Named Entity Recognition pipeline


In [42]:
EXPERIMENT_NAME = 'named-entity-recognition'
BUCKET = "your-bucket-name"

Imports


In [43]:
import kfp
from kfp import compiler
import kfp.components as comp
import kfp.dsl as dsl
from kfp import gcp

Load components


In [44]:
preprocess_operation = kfp.components.load_component_from_url(
    'https://storage.googleapis.com/{}/components/preprocess/component.yaml'.format(BUCKET))
help(preprocess_operation)

train_operation = kfp.components.load_component_from_url(
    'https://storage.googleapis.com/{}/components/train/component.yaml'.format(BUCKET))
help(train_operation)

ai_platform_deploy_operation = comp.load_component_from_url(
    "https://storage.googleapis.com/{}/components/deploy/component.yaml".format(BUCKET))
help(ai_platform_deploy_operation)


Help on function preprocess:

preprocess(input_1_uri:'GCSPath', output_x_uri_template:'GCSPath', output_y_uri_template:'GCSPath', output_preprocessing_state_uri_template:'GCSPath')
    Performs the IOB preprocessing.

Help on function train:

train(input_x_uri:'GCSPath', input_y_uri:'GCSPath', input_job_dir_uri:'GCSPath', input_tags:'Integer', input_words:'Integer', input_dropout, output_model_uri_template:'GCSPath')
    Trains the NER Bi-LSTM.

Help on function deploy:

deploy(model_path:'GCSPath', model_name:'String', model_region:'String', model_version:'String', model_runtime_version:'String', model_prediction_class:'String', model_python_version:'String', model_package_uris:'String')
    Deploy the model with custom prediction route

Build the Pipeline


In [45]:
@dsl.pipeline(
  name='Named Entity Recognition Pipeline',
  description='Performs preprocessing, training and deployment.'
)
def pipeline():
    
    preprocess_task = preprocess_operation(
        input_1_uri='gs://kubeflow-examples-data/named_entity_recognition_dataset/ner.csv,
        output_y_uri_template="gs://{}/{{workflow.uid}}/preprocess/y/data".format(BUCKET),
        output_x_uri_template="gs://{}/{{workflow.uid}}/preprocess/x/data".format(BUCKET),
        output_preprocessing_state_uri_template="gs://{}/{{workflow.uid}}/model".format(BUCKET)
    ).apply(kfp.gcp.use_gcp_secret('user-gcp-sa')) 
    
    
    train_task = train_operation(
        input_x_uri=preprocess_task.outputs['output-x-uri'],
        input_y_uri=preprocess_task.outputs['output-y-uri'],
        input_job_dir_uri="gs://{}/{{workflow.uid}}/job".format(BUCKET),
        input_tags=preprocess_task.outputs['output-tags'],
        input_words=preprocess_task.outputs['output-words'],
        input_dropout=0.1,
        output_model_uri_template="gs://{}/{{workflow.uid}}/model".format(BUCKET)
    ).apply(kfp.gcp.use_gcp_secret('user-gcp-sa')) 
    
    
    deploy_task = ai_platform_deploy_operation(
        model_path= train_task.output,
        model_name="named_entity_recognition_kubeflow",
        model_region="us-central1",
        model_version="version1",
        model_runtime_version="1.13",
        model_prediction_class="model_prediction.CustomModelPrediction",
        model_python_version="3.5",
        model_package_uris="gs://{}/routine/custom_prediction_routine-0.2.tar.gz".format(BUCKET)
    ).apply(kfp.gcp.use_gcp_secret('user-gcp-sa'))

Compile the Pipeline


In [46]:
pipeline_func = pipeline
pipeline_filename = pipeline_func.__name__ + '.pipeline.zip'

import kfp.compiler as compiler
compiler.Compiler().compile(pipeline_func, pipeline_filename)

Create a Kubeflow Experiment


In [47]:
client = kfp.Client()

try:
    experiment = client.get_experiment(experiment_name=EXPERIMENT_NAME)
except:
    experiment = client.create_experiment(EXPERIMENT_NAME)
    
print(experiment)


{'created_at': datetime.datetime(2019, 7, 5, 10, 32, 13, tzinfo=tzlocal()),
 'description': None,
 'id': '84e88563-7774-4bae-aa33-4a67649c136a',
 'name': 'named-entity-recognition'}

Run the Pipeline


In [48]:
arguments = {}

run_name = pipeline_func.__name__ + ' run'
run_result = client.run_pipeline(experiment.id, 
                                 run_name, 
                                 pipeline_filename, 
                                 arguments)

print(experiment.id)
print(run_name)
print(pipeline_filename)
print(arguments)


Run link here
84e88563-7774-4bae-aa33-4a67649c136a
pipeline run
pipeline.pipeline.zip
{}

In [ ]: