The notebook shows how to use KFServing SDK to create InferenceService with transformer, predictor.
In [5]:
from kubernetes import client
from kfserving import KFServingClient
from kfserving import constants
from kfserving import V1alpha2EndpointSpec
from kfserving import V1alpha2PredictorSpec
from kfserving import V1alpha2TransformerSpec
from kfserving import V1alpha2PyTorchSpec
from kfserving import V1alpha2CustomSpec
from kfserving import V1alpha2InferenceServiceSpec
from kfserving import V1alpha2InferenceService
from kubernetes.client import V1Container
from kubernetes.client import V1ResourceRequirements
import kubernetes.client
import os
import requests
import json
import numpy as np
Add predictor and transformer on the endpoint spec
In [6]:
api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
default_endpoint_spec = V1alpha2EndpointSpec(
predictor=V1alpha2PredictorSpec(
min_replicas=1,
pytorch=V1alpha2PyTorchSpec(
storage_uri='gs://kfserving-samples/models/pytorch/cifar10',
model_class_name= "Net",
resources=V1ResourceRequirements(
requests={'cpu':'100m','memory':'1Gi'},
limits={'cpu':'100m', 'memory':'1Gi'}))),
transformer=V1alpha2TransformerSpec(
min_replicas=1,
custom=V1alpha2CustomSpec(
container=V1Container(
image='gcr.io/kubeflow-ci/kfserving/image-transformer:latest',
name='user-container',
resources=V1ResourceRequirements(
requests={'cpu':'100m','memory':'1Gi'},
limits={'cpu':'100m', 'memory':'1Gi'})))))
isvc = V1alpha2InferenceService(api_version=api_version,
kind=constants.KFSERVING_KIND,
metadata=client.V1ObjectMeta(
name='cifar10', namespace='default'),
spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))
Call KFServingClient to create InferenceService.
In [7]:
KFServing = KFServingClient()
KFServing.create(isvc)
Out[7]:
In [8]:
KFServing.get('cifar10', namespace='default', watch=True, timeout_seconds=120)
In [9]:
api_instance = kubernetes.client.CoreV1Api(kubernetes.client.ApiClient())
service = api_instance.read_namespaced_service("istio-ingressgateway", "istio-system", exact='true')
cluster_ip = service.status.load_balancer.ingress[0].ip
In [10]:
url = "http://" + cluster_ip + "/v1/models/cifar10:predict"
headers = { 'Host': 'cifar10.default.example.com' }
with open('./input.json') as json_file:
data = json.load(json_file)
print(url, headers)
response = requests.post(url, json.dumps(data), headers=headers)
probs = json.loads(response.content.decode('utf-8'))["predictions"]
print(probs)
print(np.argmax(probs))
In [11]:
KFServing.delete('cifar10', namespace='default')
Out[11]:
In [ ]: