Sample for using transformer with KFServing SDK

The notebook shows how to use KFServing SDK to create InferenceService with transformer, predictor.


In [5]:
from kubernetes import client

from kfserving import KFServingClient
from kfserving import constants
from kfserving import V1alpha2EndpointSpec
from kfserving import V1alpha2PredictorSpec
from kfserving import V1alpha2TransformerSpec
from kfserving import V1alpha2PyTorchSpec
from kfserving import V1alpha2CustomSpec
from kfserving import V1alpha2InferenceServiceSpec
from kfserving import V1alpha2InferenceService
from kubernetes.client import V1Container
from kubernetes.client import V1ResourceRequirements
import kubernetes.client
import os
import requests 
import json
import numpy as np

Define InferenceService with Transformer

Add predictor and transformer on the endpoint spec


In [6]:
api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
default_endpoint_spec = V1alpha2EndpointSpec(
                          predictor=V1alpha2PredictorSpec(
                            min_replicas=1,
                            pytorch=V1alpha2PyTorchSpec(
                              storage_uri='gs://kfserving-samples/models/pytorch/cifar10',
                              model_class_name= "Net",
                              resources=V1ResourceRequirements(
                                  requests={'cpu':'100m','memory':'1Gi'},
                                  limits={'cpu':'100m', 'memory':'1Gi'}))),
                          transformer=V1alpha2TransformerSpec(
                            min_replicas=1,
                            custom=V1alpha2CustomSpec(
                              container=V1Container(
                              image='gcr.io/kubeflow-ci/kfserving/image-transformer:latest',
                              name='user-container',
                              resources=V1ResourceRequirements(
                                  requests={'cpu':'100m','memory':'1Gi'},
                                  limits={'cpu':'100m', 'memory':'1Gi'})))))
    
isvc = V1alpha2InferenceService(api_version=api_version,
                          kind=constants.KFSERVING_KIND,
                          metadata=client.V1ObjectMeta(
                              name='cifar10', namespace='default'),
                          spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

Create InferenceService with Transformer

Call KFServingClient to create InferenceService.


In [7]:
KFServing = KFServingClient()
KFServing.create(isvc)


Out[7]:
{'apiVersion': 'serving.kubeflow.org/v1alpha2',
 'kind': 'InferenceService',
 'metadata': {'creationTimestamp': '2019-11-13T02:19:37Z',
  'generation': 1,
  'name': 'cifar10',
  'namespace': 'default',
  'resourceVersion': '6381215',
  'selfLink': '/apis/serving.kubeflow.org/v1alpha2/namespaces/default/inferenceservices/cifar10',
  'uid': 'b9e6b4a7-186d-4daa-aba0-18beb18563b2'},
 'spec': {'default': {'predictor': {'minReplicas': 1,
    'pytorch': {'modelClassName': 'Net',
     'resources': {'limits': {'cpu': '100m', 'memory': '1Gi'},
      'requests': {'cpu': '100m', 'memory': '1Gi'}},
     'runtimeVersion': '0.2.0',
     'storageUri': 'gs://kfserving-samples/models/pytorch/cifar10'}},
   'transformer': {'custom': {'container': {'image': 'gcr.io/kubeflow-ci/kfserving/image-transformer:latest',
      'name': 'user-container',
      'resources': {'limits': {'cpu': '100m', 'memory': '1Gi'},
       'requests': {'cpu': '100m', 'memory': '1Gi'}}}},
    'minReplicas': 1}}},
 'status': {}}

Check the InferenceService


In [8]:
KFServing.get('cifar10', namespace='default', watch=True, timeout_seconds=120)


NAME                 READY      DEFAULT_TRAFFIC CANARY_TRAFFIC  URL                                               
cifar10              False                                                                                        
cifar10              False                                                                                        
cifar10              False                                                                                        
cifar10              False                                                                                        
cifar10              True       100                             http://cifar10.default.example.com/v1/models/ci...

Predict the image


In [9]:
api_instance = kubernetes.client.CoreV1Api(kubernetes.client.ApiClient())
service = api_instance.read_namespaced_service("istio-ingressgateway", "istio-system", exact='true')
cluster_ip = service.status.load_balancer.ingress[0].ip

In [10]:
url = "http://" + cluster_ip + "/v1/models/cifar10:predict"
headers = { 'Host': 'cifar10.default.example.com' }
with open('./input.json') as json_file:
    data = json.load(json_file)
    print(url, headers)
    response = requests.post(url, json.dumps(data), headers=headers)
    probs = json.loads(response.content.decode('utf-8'))["predictions"]
    print(probs)
    print(np.argmax(probs))


http://9.21.53.162/v1/models/cifar10:predict {'Host': 'cifar10.default.example.com'}
[[-1.6099603176116943, -2.6461076736450195, 0.3284446597099304, 2.4825077056884766, 0.43524616956710815, 2.3108043670654297, 1.00056791305542, -0.4232763946056366, -0.5100947022438049, -1.797839641571045]]
3

Delete the InferenceService


In [11]:
KFServing.delete('cifar10', namespace='default')


Out[11]:
{'kind': 'Status',
 'apiVersion': 'v1',
 'metadata': {},
 'status': 'Success',
 'details': {'name': 'cifar10',
  'group': 'serving.kubeflow.org',
  'kind': 'inferenceservices',
  'uid': 'b9e6b4a7-186d-4daa-aba0-18beb18563b2'}}

In [ ]: