Consume native Keras model served by TF-Serving

This notebook shows client code needed to consume a native Keras model served by Tensorflow serving. The Tensorflow serving model needs to be started using the following command:

bazel-bin/tensorflow_serving/model_servers/tensorflow_model_server \
    --port=9000 --model_name=keras-mnist-fcn \
    --model_base_path=/home/sujit/Projects/polydlot/data/tf-export/keras-mnist-fcn

In [1]:
from __future__ import division, print_function
from google.protobuf import json_format
from grpc.beta import implementations
from sklearn.preprocessing import OneHotEncoder
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2
from sklearn.metrics import accuracy_score, confusion_matrix
import json
import os
import sys
import threading
import time
import numpy as np
import tensorflow as tf

In [2]:
SERVER_HOST = "localhost"
SERVER_PORT = 9000

DATA_DIR = "../../data"
TEST_FILE = os.path.join(DATA_DIR, "mnist_test.csv")

IMG_SIZE = 28

MODEL_NAME = "keras-mnist-fcn"

Load Test Data


In [3]:
def parse_file(filename):
    xdata, ydata = [], []
    fin = open(filename, "rb")
    i = 0
    for line in fin:
        if i % 10000 == 0:
            print("{:s}: {:d} lines read".format(os.path.basename(filename), i))
        cols = line.strip().split(",")
        ydata.append(int(cols[0]))
        xdata.append(np.reshape(np.array([float(x) / 255. for x in cols[1:]]), 
                     (IMG_SIZE * IMG_SIZE, )))
        i += 1
    fin.close()
    print("{:s}: {:d} lines read".format(os.path.basename(filename), i))
    X = np.array(xdata, dtype="float32")
    y = np.array(ydata, dtype="int32")
    return X, y

Xtest, ytest = parse_file(TEST_FILE)
print(Xtest.shape, ytest.shape)


mnist_test.csv: 0 lines read
mnist_test.csv: 10000 lines read
(10000, 784) (10000,)

Make Predictions


In [4]:
channel = implementations.insecure_channel(SERVER_HOST, SERVER_PORT)
stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
labels, predictions = [], []
for i in range(Xtest.shape[0]):
    request = predict_pb2.PredictRequest()
    request.model_spec.name = MODEL_NAME
    request.model_spec.signature_name = "predict"

    Xbatch, ybatch = Xtest[i], ytest[i]
    request.inputs["images"].CopyFrom(
        tf.contrib.util.make_tensor_proto(Xbatch, shape=[1, Xbatch.size]))

    result = stub.Predict(request, 10.0)
    result_json = json.loads(json_format.MessageToJson(result))
    y_ = np.array(result_json["outputs"]["scores"]["floatVal"], dtype="float32")
    labels.append(ybatch)
    predictions.append(np.argmax(y_))

In [5]:
print("Test accuracy: {:.3f}".format(accuracy_score(labels, predictions)))
print("Confusion Matrix")
print(confusion_matrix(labels, predictions))


Test accuracy: 0.983
Confusion Matrix
[[ 969    0    0    2    0    0    2    0    4    3]
 [   0 1127    1    2    0    0    2    0    3    0]
 [   2    2 1015    3    1    0    1    6    2    0]
 [   0    0    1 1000    0    1    0    3    3    2]
 [   0    0    4    0  952    0    3    2    2   19]
 [   2    0    0    8    2  872    2    1    5    0]
 [   1    2    1    1    1    5  945    0    2    0]
 [   1    3    9    2    0    0    0 1004    3    6]
 [   0    0    2    5    0    1    0    3  959    4]
 [   1    3    0    5    6    2    1    2    1  988]]

In [ ]: