In [1]:
import os
from time import gmtime, strftime
from kubeflow import fairing
from kubeflow.fairing.frameworks import lightgbm
# Setting up google container repositories (GCR) for storing output containers
# You can use any docker container registry istead of GCR
GCP_PROJECT = fairing.cloud.gcp.guess_project_name()
DOCKER_REGISTRY = 'gcr.io/{}/fairing-job'.format(GCP_PROJECT)
In [ ]:
import subprocess
subprocess.check_call(["gcloud", "auth", "configure-docker", "--quiet"])
if os.getenv('GOOGLE_APPLICATION_CREDENTIALS'):
subprocess.check_call(["gcloud", "auth", "activate-service-account",
"--key-file=" + os.getenv("GOOGLE_APPLICATION_CREDENTIALS"),
"--quiet"])
In [ ]:
# Creating a bucket for copying the trained model.
# You can set gcs_bucket variable to an existing bucket name if that is desired.
gcs_bucket = "gs://{}-fairing".format(GCP_PROJECT)
!gsutil mb {gcs_bucket}
In [ ]:
params = {
'task': 'train',
'boosting_type': 'gbdt',
'objective': 'regression',
'metric': 'l2',
'metric_freq': 1,
'num_leaves': 31,
'learning_rate': 0.05,
'feature_fraction': 0.9,
'bagging_fraction': 0.8,
'bagging_freq': 5,
"n_estimators": 10,
"is_training_metric": "true",
"valid_data": "gs://fairing-lightgbm/regression-example/regression.test",
"train_data": "gs://fairing-lightgbm/regression-example/regression.train",
'verbose': 1,
"verbose_eval": 1,
"model_output": "{}/lightgbm/example/model_{}.txt".format(gcs_bucket, strftime("%Y_%m_%d_%H_%M_%S", gmtime())),
"num_machines": 3,
"tree_learner": "feature"
}
In [ ]:
lightgbm.execute(config=params,
docker_registry=DOCKER_REGISTRY,
cores_per_worker=2, # Allocating 2 CPU cores per worker instance
memory_per_worker=0.5, # Allocating 0.5GB of memory per worker instance
stream_log=True)
In [ ]:
url = params['model_output']
model_name = os.path.split(url)[1]
!gsutil cp {url} /tmp/{model_name}
!head /tmp/{model_name}
In [ ]:
predict_params = {
"task": "predict",
'metric': 'l2',
"data": "gs://fairing-lightgbm/regression-example/regression.test",
"input_model": params['model_output'],
"output_result": "{}/lightgbm/example/prediction_result_{}.txt".format(gcs_bucket, model_name)
}
In [ ]:
lightgbm.execute(config=predict_params, docker_registry=DOCKER_REGISTRY)
In [ ]:
url = predict_params['output_result']
file_name = os.path.split(url)[1]
!gsutil cp {url} /tmp/{file_name}
In [ ]:
import pandas as pd
predictions = pd.read_csv("/tmp/{}".format(file_name), header=None)
print("Prediction mean: {}, count: {}".format(predictions.mean()[0], predictions.count()[0]))
In [ ]: