In [1]:
# restart your notebook if prompted on Colab
try:
import verta
except ImportError:
!pip install verta
This example features:
LinearRegression model
In [2]:
HOST = "app.verta.ai"
PROJECT_NAME = "Census Income Classification"
EXPERIMENT_NAME = "Logistic Regression"
In [3]:
# import os
# os.environ['VERTA_EMAIL'] =
# os.environ['VERTA_DEV_KEY'] =
In [4]:
from __future__ import print_function
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
import itertools
import multiprocessing
import os
import time
import six
import numpy as np
import pandas as pd
import sklearn
from sklearn import model_selection
from sklearn import linear_model
from sklearn import metrics
In [5]:
try:
import wget
except ImportError:
!pip install wget # you may need pip3
import wget
This section demonstrates logging model metadata and training artifacts to ModelDB.
In [6]:
train_data_url = "http://s3.amazonaws.com/verta-starter/census-train.csv"
train_data_filename = wget.detect_filename(train_data_url)
if not os.path.isfile(train_data_filename):
wget.download(train_data_url)
test_data_url = "http://s3.amazonaws.com/verta-starter/census-test.csv"
test_data_filename = wget.detect_filename(test_data_url)
if not os.path.isfile(test_data_filename):
wget.download(test_data_url)
In [7]:
df_train = pd.read_csv(train_data_filename)
X_train = df_train.iloc[:,:-1]
y_train = df_train.iloc[:, -1]
df_train.head()
In [8]:
hyperparam_candidates = {
'C': [1e-6, 1e-4],
'solver': ['lbfgs'],
'max_iter': [15, 28],
}
hyperparam_sets = [dict(zip(hyperparam_candidates.keys(), values))
for values
in itertools.product(*hyperparam_candidates.values())]
In [9]:
from verta import Client
from verta.utils import ModelAPI
client = Client(HOST)
proj = client.set_project(PROJECT_NAME)
expt = client.set_experiment(EXPERIMENT_NAME)
In [10]:
def run_experiment(hyperparams):
# create object to track experiment run
run = client.set_experiment_run()
# create validation split
(X_val_train, X_val_test,
y_val_train, y_val_test) = model_selection.train_test_split(X_train, y_train,
test_size=0.2,
shuffle=True)
# log hyperparameters
run.log_hyperparameters(hyperparams)
print(hyperparams, end=' ')
# create and train model
model = linear_model.LogisticRegression(**hyperparams)
model.fit(X_train, y_train)
# calculate and log validation accuracy
val_acc = model.score(X_val_test, y_val_test)
run.log_metric("val_acc", val_acc)
print("Validation accuracy: {:.4f}".format(val_acc))
# create deployment artifacts
model_api = ModelAPI(X_train, model.predict(X_train))
requirements = ["scikit-learn"]
# save and log model
run.log_model(model, model_api=model_api)
run.log_requirements(requirements)
# log Git information as code version
run.log_code()
pool = multiprocessing.Pool()
pool.map(run_experiment, hyperparam_sets)
pool.close()
This section demonstrates querying and retrieving runs via the Client.
In [11]:
best_run = expt.expt_runs.sort("metrics.val_acc", descending=True)[0]
print("Validation Accuracy: {:.4f}".format(best_run.get_metric("val_acc")))
best_hyperparams = best_run.get_hyperparameters()
print("Hyperparameters: {}".format(best_hyperparams))
In [12]:
model = linear_model.LogisticRegression(multi_class='auto', **best_hyperparams)
model.fit(X_train, y_train)
In [13]:
train_acc = model.score(X_train, y_train)
print("Training accuracy: {:.4f}".format(train_acc))
This section demonstrates model deployment and predictions, if supported by your version of ModelDB.
In [14]:
model_id = 'YOUR_MODEL_ID'
run = client.set_experiment_run(id=model_id)
In [15]:
run.log_training_data(X_train, y_train)
In [16]:
df_test = pd.read_csv(test_data_filename)
X_test = df_test.iloc[:,:-1]
In [17]:
run.deploy(wait=True)
run
In [18]:
deployed_model = run.get_deployed_model()
for x in itertools.cycle(X_test.values.tolist()):
print(deployed_model.predict([x]))
time.sleep(.5)