In [ ]:
# Copyright 2019 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [ ]:
# Install Pipeline SDK - This only needs to be ran once in the enviroment. 
!python3 -m pip install 'kfp>=0.1.31' --quiet

KubeFlow Pipelines - Container building

In this notebook, we will demo:

  • Buiding a container image to use as base image for component

Reference documentation:

Setup


In [ ]:

Create a python function


In [ ]:
def add(a: float, b: float) -> float:
    '''Calculates sum of two arguments'''
    
    print("Adding two values %s and %s" %(a, b))
    
    return a + b

Build a new container for and use is as the base image for components.

Build and push a new container image that includes some required packages. The packages are specified in the requirements.txt file in the working directory. Use the new container image as the base image for components. Create a new component from the above function. The return value "add_op" represents a step that can be used directly in a pipeline function.


In [ ]:
from pathlib import Path
print(Path('requirements.txt').read_text())

In [ ]:
import kfp
from kfp.containers import build_image_from_working_dir
from kfp.components import func_to_container_op

In [ ]:
# Building and pushing new container image
image_with_packages = build_image_from_working_dir(
    #working_dir='.',                            # Optional. Default is the current directory
    #base_image='google/cloud-sdk:latest',       # Optional
    #image_name='gcr.io/my-org/my-image:latest', # Optional. Default is gcr.io/<project_id>/<notebook_id>/kfp_container
)

# Creating component while explicitly specifying the newly-built base image
add_op = func_to_container_op(add, base_image=image_with_packages)

In [ ]:
# You can also set up the image builder as default image so that it is always used by default
kfp.components.default_base_image_or_builder = build_image_from_working_dir

# Or is you want to customize the builder, you can use lambda:
kfp.components.default_base_image_or_builder = lambda: build_image_from_working_dir(base_image='google/cloud-sdk:latest')

# Now all python components will start using that container builder by default:
add_op2 = func_to_container_op(add)

Build a pipeline using this component


In [ ]:
import kfp.dsl as dsl
@dsl.pipeline(
   name='Calculation pipeline',
   description='A sample pipeline that performs arithmetic calculations.'
)
def calc_pipeline(
   a='1',
   b='7',
   c='17',
):
    #Passing pipeline parameter and a constant value as operation arguments
    add_task = add_op(a, b) #Returns a dsl.ContainerOp class instance. 
    
    #You can create explicit dependancy between the tasks using xyz_task.after(abc_task)
    add_2_task = add_op(b, c)
    
    add_3_task = add_op(add_task.output, add_2_task.output)

Submit the pipeline for execution


In [ ]:
arguments = {'a': '7', 'b': '8'}
kfp.Client().create_run_from_pipeline_func(pipeline_func=calc_pipeline, arguments=arguments)

# This should output link that leads to the run information page. 
# Note: There is a bug in JupyterLab that modifies the URL and makes the link stop working