In [1]:
# restart your notebook if prompted on Colab
try:
import verta
except ImportError:
!pip install verta
This example features:
LinearRegression
modelGridSearchCV
utility for performing grid search and cross-validation
In [2]:
HOST = "app.verta.ai"
PROJECT_NAME = "Iris Multiclassification"
EXPERIMENT_NAME = "Logistic Regression"
In [3]:
# import os
# os.environ['VERTA_EMAIL'] =
# os.environ['VERTA_DEV_KEY'] =
In [4]:
from __future__ import print_function
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
import itertools
import time
import six
import numpy as np
import pandas as pd
import sklearn
from sklearn import datasets
from sklearn import linear_model
from sklearn import metrics
from sklearn import model_selection
In [5]:
data = datasets.load_iris()
X = data['data']
y = data['target']
In [6]:
df = pd.DataFrame(np.hstack((X, y.reshape(-1, 1))),
columns=data['feature_names'] + ['species'])
df.head()
In [7]:
grid = {
'C': [1e-4, 1e-3, 1e-2],
'solver': ['lbfgs'],
'max_iter': [1e4, 1e5],
}
In [8]:
from verta import Client
from verta.utils import ModelAPI
client = Client(HOST)
proj = client.set_project(PROJECT_NAME)
expt = client.set_experiment(EXPERIMENT_NAME)
In [9]:
model = linear_model.LogisticRegression(multi_class='auto')
grid_search = model_selection.GridSearchCV(model, grid,
cv=5, return_train_score=False)
grid_search.fit(X, y)
In [10]:
results = pd.DataFrame(grid_search.cv_results_)
for _, run_result in results.iterrows():
run = client.set_experiment_run()
# log training data
run.log_dataset("train_data", df)
# log hyperparameters
run.log_hyperparameters(run_result['params'])
# log accuracy for each validation fold
for obs_key in ["split{}_test_score".format(i) for i in range(5)]:
run.log_observation("fold_acc", run_result[obs_key])
# log summary stats of validation
run.log_metric("val_acc_mean", run_result['mean_test_score'])
run.log_metric("val_acc_std", run_result['std_test_score'])
In [11]:
best_run = expt.expt_runs.sort("metrics.val_acc_mean", descending=True)[0]
print("Validation Accuracy: {:.4f}".format(best_run.get_metric("val_acc_mean")))
best_hyperparams = best_run.get_hyperparameters()
print("Hyperparameters: {}".format(best_hyperparams))
In [12]:
model = linear_model.LogisticRegression(multi_class='auto', **best_hyperparams)
model.fit(X, y)
In [13]:
train_acc = model.score(X, y)
best_run.log_metric("train_acc", train_acc)
print("Training accuracy: {:.4f}".format(train_acc))
In [14]:
# create deployment artifacts
model_api = ModelAPI(X, model.predict(X))
requirements = ["scikit-learn"]
best_run.log_model(model, model_api=model_api)
best_run.log_requirements(requirements)
In [15]:
best_run
In [16]:
from verta._demo_utils import DeployedModel
deployed_model = DeployedModel(HOST, best_run.id)
In [17]:
for x in itertools.cycle(X.tolist()):
print(deployed_model.predict([x]))
time.sleep(.5)