In [1]:
import tensorflow as tf
import shutil
import math
from datetime import datetime
import multiprocessing
from tensorflow.python.feature_column import feature_column

from tensorflow.contrib.learn import learn_runner
from tensorflow.contrib.learn import make_export_strategy

print(tf.__version__)


/Users/khalidsalama/anaconda/lib/python3.6/importlib/_bootstrap.py:205: RuntimeWarning: compiletime version 3.5 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.6
  return f(*args, **kwds)
1.4.0

Steps to use the TF Experiment APIs

  1. Define dataset metadata
  2. Define data input function to read the data from csv files + feature processing
  3. Create TF feature columns based on metadata + extended feature columns
  4. Define an estimator creation function with the required feature columns & parameters
  5. Run an Experiment with learn_runner to train, evaluate, and export the model
  6. Evaluate the model using test data
  7. Perform predictions and serving the saved model

In [2]:
MODEL_NAME = 'reg-model-02'

TRAIN_DATA_FILES_PATTERN = 'data/train-*.csv'
VALID_DATA_FILES_PATTERN = 'data/valid-*.csv'
TEST_DATA_FILES_PATTERN = 'data/test-*.csv'

RESUME_TRAINING = False
PROCESS_FEATURES = True
MULTI_THREADING = True

1. Define Dataset Metadata

  • CSV file header and defaults
  • Numeric and categorical feature names
  • Target feature name
  • Unused columns

In [3]:
HEADER = ['key','x','y','alpha','beta','target']
HEADER_DEFAULTS = [[0], [0.0], [0.0], ['NA'], ['NA'], [0.0]]

NUMERIC_FEATURE_NAMES = ['x', 'y']  

CATEGORICAL_FEATURE_NAMES_WITH_VOCABULARY = {'alpha':['ax01', 'ax02'], 'beta':['bx01', 'bx02']}
CATEGORICAL_FEATURE_NAMES = list(CATEGORICAL_FEATURE_NAMES_WITH_VOCABULARY.keys())

FEATURE_NAMES = NUMERIC_FEATURE_NAMES + CATEGORICAL_FEATURE_NAMES

TARGET_NAME = 'target'

UNUSED_FEATURE_NAMES = list(set(HEADER) - set(FEATURE_NAMES) - {TARGET_NAME})

print("Header: {}".format(HEADER))
print("Numeric Features: {}".format(NUMERIC_FEATURE_NAMES))
print("Categorical Features: {}".format(CATEGORICAL_FEATURE_NAMES))
print("Target: {}".format(TARGET_NAME))
print("Unused Features: {}".format(UNUSED_FEATURE_NAMES))


Header: ['key', 'x', 'y', 'alpha', 'beta', 'target']
Numeric Features: ['x', 'y']
Categorical Features: ['alpha', 'beta']
Target: target
Unused Features: ['key']

2. Define Data Input Function

  • Input csv files name pattern
  • Use TF file queue mechanism to read files
  • Parse CSV lines to feature tensors
  • Apply feature processing
  • Return (features, target) tensors

In [4]:
def process_features(features):
    
    features["x_2"] = tf.square(features['x'])
    features["y_2"] = tf.square(features['y'])
    features["xy"] = tf.multiply(features['x'], features['y']) # features['x'] * features['y']
    features['dist_xy'] =  tf.sqrt(tf.squared_difference(features['x'],features['y']))
    
    return features

def csv_input_fn(file_names, mode=tf.estimator.ModeKeys.EVAL, 
                 skip_header_lines=0, 
                 num_epochs=1, 
                 batch_size=200):
    
    shuffle = True if mode == tf.estimator.ModeKeys.TRAIN else False
    
    input_file_names = tf.train.match_filenames_once(pattern=file_names)

    filename_queue = tf.train.string_input_producer(
        input_file_names, num_epochs=num_epochs, shuffle=shuffle)

    reader = tf.TextLineReader(skip_header_lines=skip_header_lines)

    _, rows = reader.read_up_to(filename_queue, num_records=batch_size)
    
    row_columns = tf.expand_dims(rows, -1)
    columns = tf.decode_csv(row_columns, record_defaults=HEADER_DEFAULTS)
    features = dict(zip(HEADER, columns))
    
    # Remove unused columns
    for column in UNUSED_FEATURE_NAMES:
        features.pop(column)
    
    num_threads=1
    
    if MULTI_THREADING:
        num_threads=multiprocessing.cpu_count()
        
    if shuffle:
        features = tf.train.shuffle_batch(
            features,
            batch_size,
            min_after_dequeue=2 * batch_size + 1,
            capacity=batch_size * 10,
            num_threads=multiprocessing.cpu_count(),
            enqueue_many=True,
            allow_smaller_final_batch=True
        )
    else:
        features = tf.train.batch(
            features,
            batch_size,
            capacity=batch_size * 10,
            num_threads=multiprocessing.cpu_count(),
            enqueue_many=True,
            allow_smaller_final_batch=True
        )

    target = features.pop(TARGET_NAME)
    
    
    print("")
    print("* data input_fn:")
    print("================")
    print("Input file(s): {}".format(file_names))
    print("Batch size: {}".format(batch_size))
    print("Epoch Count: {}".format(num_epochs))
    print("Mode: {}".format(mode))
    print("Thread Count: {}".format(num_threads))
    print("Shuffle: {}".format(shuffle))
    print("================")
    print("")
    
    if PROCESS_FEATURES:
        features = process_features(features)
    
    return features, target

In [5]:
features, target = csv_input_fn(file_names=TRAIN_DATA_FILES_PATTERN)
print("Feature read from CSV: {}".format(list(features.keys())))
print("Target read from CSV: {}".format(target))


* data input_fn:
================
Input file(s): data/train-*.csv
Batch size: 200
Epoch Count: 1
Mode: eval
Thread Count: 4
Shuffle: False
================

Feature read from CSV: ['alpha', 'beta', 'x', 'y', 'x_2', 'y_2', 'xy', 'dist_xy']
Target read from CSV: Tensor("batch:2", shape=(?, 1), dtype=float32)

3. Define Feature Columns

The input numeric columns are assumed to be normalized (or have the same scale). Otherwise, a normlizer_fn, along with the normlisation params (mean, stdv or min, max) should be passed to tf.feature_column.numeric_column() constructor.


In [6]:
def get_feature_columns():
    
    CONSTRUCTED_NUMERIC_FEATURES_NAMES = ['x_2', 'y_2', 'xy', 'dist_xy']
    all_numeric_feature_names = NUMERIC_FEATURE_NAMES + CONSTRUCTED_NUMERIC_FEATURES_NAMES

    numeric_columns = {feature_name: tf.feature_column.numeric_column(feature_name)
                       for feature_name in all_numeric_feature_names}

    categorical_column_with_vocabulary = \
        {item[0]: tf.feature_column.categorical_column_with_vocabulary_list(item[0], item[1])
         for item in CATEGORICAL_FEATURE_NAMES_WITH_VOCABULARY.items()}
        
    feature_columns = {}

    if numeric_columns is not None:
        feature_columns.update(numeric_columns)

    if categorical_column_with_vocabulary is not None:
        feature_columns.update(categorical_column_with_vocabulary) 
    
    # add extended features (crossing, bucektization, embedding)
    
    feature_columns['alpha_X_beta'] = tf.feature_column.crossed_column(
        [feature_columns['alpha'], feature_columns['beta']], 4)
    
    return feature_columns

feature_columns = get_feature_columns()
print("Feature Columns: {}".format(feature_columns))


Feature Columns: {'x': _NumericColumn(key='x', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'y': _NumericColumn(key='y', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'x_2': _NumericColumn(key='x_2', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'y_2': _NumericColumn(key='y_2', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'xy': _NumericColumn(key='xy', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'dist_xy': _NumericColumn(key='dist_xy', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), 'alpha': _VocabularyListCategoricalColumn(key='alpha', vocabulary_list=('ax01', 'ax02'), dtype=tf.string, default_value=-1, num_oov_buckets=0), 'beta': _VocabularyListCategoricalColumn(key='beta', vocabulary_list=('bx01', 'bx02'), dtype=tf.string, default_value=-1, num_oov_buckets=0), 'alpha_X_beta': _CrossedColumn(keys=(_VocabularyListCategoricalColumn(key='alpha', vocabulary_list=('ax01', 'ax02'), dtype=tf.string, default_value=-1, num_oov_buckets=0), _VocabularyListCategoricalColumn(key='beta', vocabulary_list=('bx01', 'bx02'), dtype=tf.string, default_value=-1, num_oov_buckets=0)), hash_bucket_size=4, hash_key=None)}

4. Define an Estimator Creation Function

  • Get dense (numeric) columns from the feature columns
  • Convert categorical columns to indicator columns
  • Create Instantiate a DNNRegressor estimator given dense + indicator feature columns + params

In [7]:
def create_estimator(run_config, hparams):
    
    feature_columns = list(get_feature_columns().values())
    
    dense_columns = list(
        filter(lambda column: isinstance(column, feature_column._NumericColumn),
               feature_columns
        )
    )

    categorical_columns = list(
        filter(lambda column: isinstance(column, feature_column._VocabularyListCategoricalColumn) |
                              isinstance(column, feature_column._BucketizedColumn),
                   feature_columns)
    )
    
    
    # convert categorical columns to indicators
    
    indicator_columns = list(
            map(lambda column: tf.feature_column.indicator_column(column),
                categorical_columns)
    )
    
    
    estimator = tf.estimator.DNNRegressor(
        
        feature_columns= dense_columns + indicator_columns ,
        hidden_units= hparams.hidden_units,
        
        optimizer= tf.train.AdamOptimizer(),
        activation_fn= tf.nn.elu,
        dropout= hparams.dropout_prob,
        
        config= run_config
    )
    
    print("")
    print("Estimator Type: {}".format(type(estimator)))
    print("")
    
    return estimator

5. Run Experiment

a. Define Experiment Function


In [1]:
def generate_experiment_fn(**experiment_args):

    def _experiment_fn(run_config, hparams):
        
        validation_monitor = EarlyStoppingHook(early_stopping_rounds=hparams.early_stopping_rounds)
        
        train_input_fn = lambda: csv_input_fn(
            TRAIN_DATA_FILES_PATTERN,
            mode = tf.estimator.ModeKeys.TRAIN,
            num_epochs=hparams.num_epochs,
            batch_size=hparams.batch_size
        )

        eval_input_fn = lambda: csv_input_fn(
            VALID_DATA_FILES_PATTERN,
            mode=tf.estimator.EVAL,
            num_epochs=1,
            batch_size=hparams.batch_size
            #batch_size=300
        )

        estimator = create_estimator(run_config, hparams)

        return tf.contrib.learn.Experiment(
            estimator,
            train_input_fn=train_input_fn,
            eval_input_fn=eval_input_fn,
            eval_steps=None,
            **experiment_args
        )

    return _experiment_fn

b. Set HParam and RunConfig


In [10]:
hparams  = tf.contrib.training.HParams(
    num_epochs = 100, 
    batch_size = 500,
    hidden_units=[8, 4],
    early_stopping_rounds = 1,
    dropout_prob = 0.0)

model_dir = 'trained_models/{}'.format(MODEL_NAME)

run_config = tf.contrib.learn.RunConfig(
    save_checkpoints_steps=480, # to evaluate after each 20 epochs => (12000/500) * 20
    tf_random_seed=19830610,
    model_dir=model_dir
)

print("Model directory: {}".format(run_config.model_dir))
print("Hyper-parameters: {}".format(hparams))
print("")


Model directory: trained_models/reg-model-02
Hyper-parameters: [('batch_size', 500), ('dropout_prob', 0.0), ('early_stopping_rounds', 1), ('hidden_units', [8, 4]), ('num_epochs', 100)]

c. Define Serving Function


In [11]:
def csv_serving_input_fn():
    
    SERVING_HEADER = ['x','y','alpha','beta']
    SERVING_HEADER_DEFAULTS = [[0.0], [0.0], ['NA'], ['NA']]

    rows_string_tensor = tf.placeholder(dtype=tf.string,
                                         shape=[None],
                                         name='csv_rows')
    
    receiver_tensor = {'csv_rows': rows_string_tensor}

    row_columns = tf.expand_dims(rows_string_tensor, -1)
    columns = tf.decode_csv(row_columns, record_defaults=SERVING_HEADER_DEFAULTS)
    features = dict(zip(SERVING_HEADER, columns))

    if PROCESS_FEATURES:
        features = process_features(features)

    return tf.estimator.export.ServingInputReceiver(
        features, receiver_tensor)

d. Run Experiment via learn_runner


In [12]:
if not RESUME_TRAINING:
    print("Removing previous artifacts...")
    shutil.rmtree(model_dir, ignore_errors=True)
else:
    print("Resuming training...") 

    
tf.logging.set_verbosity(tf.logging.INFO)

time_start = datetime.utcnow() 
print("Experiment started at {}".format(time_start.strftime("%H:%M:%S")))
print(".......................................") 

learn_runner.run(
    experiment_fn=generate_experiment_fn(

        export_strategies=[make_export_strategy(
            csv_serving_input_fn,
            exports_to_keep=1
        )]
    ),
    run_config=run_config,
    schedule="train_and_evaluate",
    hparams=hparams
)

time_end = datetime.utcnow() 
print(".......................................")
print("Experiment finished at {}".format(time_end.strftime("%H:%M:%S")))
print("")
time_elapsed = time_end - time_start
print("Experiment elapsed time: {} seconds".format(time_elapsed.total_seconds()))


Removing previous artifacts...
Experiment started at 16:38:38
.......................................
WARNING:tensorflow:RunConfig.uid (from tensorflow.contrib.learn.python.learn.estimators.run_config) is experimental and may change or be removed at any time, and without warning.

*** EarlyStoppingHook: - Created
*** EarlyStoppingHook:: Early Stopping Rounds: 1

INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1244319b0>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_tf_random_seed': 19830610, '_save_summary_steps': 100, '_save_checkpoints_secs': None, '_log_step_count_steps': 100, '_session_config': None, '_save_checkpoints_steps': 480, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': 'trained_models/reg-model-02'}

Estimator Type: <class 'tensorflow.python.estimator.canned.dnn.DNNRegressor'>

WARNING:tensorflow:RunConfig.uid (from tensorflow.contrib.learn.python.learn.estimators.run_config) is experimental and may change or be removed at any time, and without warning.
WARNING:tensorflow:From /Users/khalidsalama/anaconda/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/monitors.py:267: BaseMonitor.__init__ (from tensorflow.contrib.learn.python.learn.monitors) is deprecated and will be removed after 2016-12-05.
Instructions for updating:
Monitors are deprecated. Please use tf.train.SessionRunHook.

* data input_fn:
================
Input file(s): data/train-*.csv
Batch size: 500
Epoch Count: 100
Mode: train
Thread Count: 4
Shuffle: True
================

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into trained_models/reg-model-02/model.ckpt.

* data input_fn:
================
Input file(s): data/valid-*.csv
Batch size: 500
Epoch Count: 1
Mode: eval
Thread Count: 4
Shuffle: False
================

INFO:tensorflow:Starting evaluation at 2017-11-15-16:38:42
INFO:tensorflow:Restoring parameters from trained_models/reg-model-02/model.ckpt-1
INFO:tensorflow:Finished evaluation at 2017-11-15-16:38:43
INFO:tensorflow:Saving dict for global step 1: average_loss = 322.521, global_step = 1, loss = 161260.0
INFO:tensorflow:Validation (step 1): average_loss = 322.521, loss = 161260.0, global_step = 1
INFO:tensorflow:loss = 152071.0, step = 1
INFO:tensorflow:global_step/sec: 49.9015
INFO:tensorflow:loss = 157382.0, step = 101 (0.572 sec)
INFO:tensorflow:global_step/sec: 181.687
INFO:tensorflow:loss = 145213.0, step = 201 (0.550 sec)
INFO:tensorflow:global_step/sec: 212.562
INFO:tensorflow:loss = 124019.0, step = 301 (0.470 sec)
INFO:tensorflow:global_step/sec: 208.804
INFO:tensorflow:loss = 144489.0, step = 401 (0.479 sec)
INFO:tensorflow:Saving checkpoints for 481 into trained_models/reg-model-02/model.ckpt.

* data input_fn:
================
Input file(s): data/valid-*.csv
Batch size: 500
Epoch Count: 1
Mode: eval
Thread Count: 4
Shuffle: False
================

INFO:tensorflow:Starting evaluation at 2017-11-15-16:38:47
INFO:tensorflow:Restoring parameters from trained_models/reg-model-02/model.ckpt-481
INFO:tensorflow:Finished evaluation at 2017-11-15-16:38:47
INFO:tensorflow:Saving dict for global step 481: average_loss = 277.663, global_step = 481, loss = 138832.0
INFO:tensorflow:Validation (step 481): average_loss = 277.663, loss = 138832.0, global_step = 481
INFO:tensorflow:global_step/sec: 54.2012
INFO:tensorflow:loss = 139774.0, step = 501 (1.847 sec)
INFO:tensorflow:global_step/sec: 171.266
INFO:tensorflow:loss = 131535.0, step = 601 (0.583 sec)
INFO:tensorflow:global_step/sec: 153.017
INFO:tensorflow:loss = 114776.0, step = 701 (0.656 sec)
INFO:tensorflow:global_step/sec: 151.408
INFO:tensorflow:loss = 134135.0, step = 801 (0.662 sec)
INFO:tensorflow:global_step/sec: 157.898
INFO:tensorflow:loss = 108377.0, step = 901 (0.632 sec)
INFO:tensorflow:Saving checkpoints for 961 into trained_models/reg-model-02/model.ckpt.

* data input_fn:
================
Input file(s): data/valid-*.csv
Batch size: 500
Epoch Count: 1
Mode: eval
Thread Count: 4
Shuffle: False
================

INFO:tensorflow:Starting evaluation at 2017-11-15-16:38:51
INFO:tensorflow:Restoring parameters from trained_models/reg-model-02/model.ckpt-961
INFO:tensorflow:Finished evaluation at 2017-11-15-16:38:52
INFO:tensorflow:Saving dict for global step 961: average_loss = 221.277, global_step = 961, loss = 110639.0
INFO:tensorflow:Validation (step 961): average_loss = 221.277, loss = 110639.0, global_step = 961
INFO:tensorflow:global_step/sec: 49.435
INFO:tensorflow:loss = 102264.0, step = 1001 (2.020 sec)
INFO:tensorflow:global_step/sec: 142.21
INFO:tensorflow:loss = 91063.4, step = 1101 (0.707 sec)
INFO:tensorflow:global_step/sec: 143.631
INFO:tensorflow:loss = 83967.0, step = 1201 (0.696 sec)
INFO:tensorflow:global_step/sec: 168.723
INFO:tensorflow:loss = 86682.8, step = 1301 (0.588 sec)
INFO:tensorflow:global_step/sec: 174.187
INFO:tensorflow:loss = 71982.8, step = 1401 (0.575 sec)
INFO:tensorflow:Saving checkpoints for 1441 into trained_models/reg-model-02/model.ckpt.

* data input_fn:
================
Input file(s): data/valid-*.csv
Batch size: 500
Epoch Count: 1
Mode: eval
Thread Count: 4
Shuffle: False
================

INFO:tensorflow:Starting evaluation at 2017-11-15-16:38:56
INFO:tensorflow:Restoring parameters from trained_models/reg-model-02/model.ckpt-1441
INFO:tensorflow:Finished evaluation at 2017-11-15-16:38:56
INFO:tensorflow:Saving dict for global step 1441: average_loss = 165.102, global_step = 1441, loss = 82551.2
INFO:tensorflow:Validation (step 1441): average_loss = 165.102, loss = 82551.2, global_step = 1441
INFO:tensorflow:global_step/sec: 52.7557
INFO:tensorflow:loss = 84999.6, step = 1501 (1.896 sec)
INFO:tensorflow:global_step/sec: 162.651
INFO:tensorflow:loss = 62800.4, step = 1601 (0.615 sec)
INFO:tensorflow:global_step/sec: 133.662
INFO:tensorflow:loss = 75581.0, step = 1701 (0.749 sec)
INFO:tensorflow:global_step/sec: 146.833
INFO:tensorflow:loss = 63824.4, step = 1801 (0.681 sec)
INFO:tensorflow:global_step/sec: 174.418
INFO:tensorflow:loss = 63489.4, step = 1901 (0.572 sec)
INFO:tensorflow:Saving checkpoints for 1921 into trained_models/reg-model-02/model.ckpt.

* data input_fn:
================
Input file(s): data/valid-*.csv
Batch size: 500
Epoch Count: 1
Mode: eval
Thread Count: 4
Shuffle: False
================

INFO:tensorflow:Starting evaluation at 2017-11-15-16:39:00
INFO:tensorflow:Restoring parameters from trained_models/reg-model-02/model.ckpt-1921
INFO:tensorflow:Finished evaluation at 2017-11-15-16:39:01
INFO:tensorflow:Saving dict for global step 1921: average_loss = 138.061, global_step = 1921, loss = 69030.5
INFO:tensorflow:Validation (step 1921): average_loss = 138.061, loss = 69030.5, global_step = 1921
INFO:tensorflow:global_step/sec: 40.8547
INFO:tensorflow:loss = 59664.1, step = 2001 (2.456 sec)
INFO:tensorflow:global_step/sec: 161.944
INFO:tensorflow:loss = 70233.4, step = 2101 (0.608 sec)
INFO:tensorflow:global_step/sec: 172.256
INFO:tensorflow:loss = 64989.5, step = 2201 (0.581 sec)
INFO:tensorflow:global_step/sec: 99.7086
INFO:tensorflow:loss = 65060.5, step = 2301 (1.006 sec)
INFO:tensorflow:Saving checkpoints for 2400 into trained_models/reg-model-02/model.ckpt.
INFO:tensorflow:Loss for final step: 56258.0.

* data input_fn:
================
Input file(s): data/valid-*.csv
Batch size: 500
Epoch Count: 1
Mode: eval
Thread Count: 4
Shuffle: False
================

INFO:tensorflow:Starting evaluation at 2017-11-15-16:39:05
INFO:tensorflow:Restoring parameters from trained_models/reg-model-02/model.ckpt-2400
INFO:tensorflow:Finished evaluation at 2017-11-15-16:39:06
INFO:tensorflow:Saving dict for global step 2400: average_loss = 121.123, global_step = 2400, loss = 60561.4
INFO:tensorflow:Restoring parameters from trained_models/reg-model-02/model.ckpt-2400
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: b"trained_models/reg-model-02/export/Servo/temp-b'1510763946'/saved_model.pb"
.......................................
Experiment finished at 16:39:07

Experiment elapsed time: 28.494065 seconds

6. Evaluate the Model


In [13]:
TRAIN_SIZE = 12000
VALID_SIZE = 3000
TEST_SIZE = 5000

train_input_fn = lambda: csv_input_fn(file_names= TRAIN_DATA_FILES_PATTERN, 
                                      mode= tf.estimator.ModeKeys.EVAL,
                                      batch_size= TRAIN_SIZE)

valid_input_fn = lambda: csv_input_fn(file_names= VALID_DATA_FILES_PATTERN, 
                                      mode= tf.estimator.ModeKeys.EVAL,
                                      batch_size= VALID_SIZE)

test_input_fn = lambda: csv_input_fn(file_names= TEST_DATA_FILES_PATTERN, 
                                      mode= tf.estimator.ModeKeys.EVAL,
                                      batch_size= TEST_SIZE)

estimator = create_estimator(run_config, hparams)

train_results = estimator.evaluate(input_fn=train_input_fn)
train_rmse = round(math.sqrt(train_results["average_loss"]),5)
print()
print("############################################################################################")
print("# Train RMSE: {} - {}".format(train_rmse, train_results))
print("############################################################################################")

valid_results = estimator.evaluate(input_fn=valid_input_fn, steps=1)
valid_rmse = round(math.sqrt(valid_results["average_loss"]),5)
print()
print("############################################################################################")
print("# Valid RMSE: {} - {}".format(valid_rmse,valid_results))
print("############################################################################################")

test_results = estimator.evaluate(input_fn=test_input_fn, steps=1)
test_rmse = round(math.sqrt(test_results["average_loss"]),5)
print()
print("############################################################################################")
print("# Test RMSE: {} - {}".format(test_rmse, test_results))
print("############################################################################################")


INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1244319b0>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_tf_random_seed': 19830610, '_save_summary_steps': 100, '_save_checkpoints_secs': None, '_log_step_count_steps': 100, '_session_config': None, '_save_checkpoints_steps': 480, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': 'trained_models/reg-model-02'}

Estimator Type: <class 'tensorflow.python.estimator.canned.dnn.DNNRegressor'>


* data input_fn:
================
Input file(s): data/train-*.csv
Batch size: 12000
Epoch Count: 1
Mode: eval
Thread Count: 4
Shuffle: False
================

INFO:tensorflow:Starting evaluation at 2017-11-15-16:39:07
INFO:tensorflow:Restoring parameters from trained_models/reg-model-02/model.ckpt-2400
INFO:tensorflow:Finished evaluation at 2017-11-15-16:39:08
INFO:tensorflow:Saving dict for global step 2400: average_loss = 110.983, global_step = 2400, loss = 1.3318e+06

############################################################################################
# Train RMSE: 10.53484 - {'average_loss': 110.98292, 'loss': 1331795.0, 'global_step': 2400}
############################################################################################

* data input_fn:
================
Input file(s): data/valid-*.csv
Batch size: 3000
Epoch Count: 1
Mode: eval
Thread Count: 4
Shuffle: False
================

INFO:tensorflow:Starting evaluation at 2017-11-15-16:39:08
INFO:tensorflow:Restoring parameters from trained_models/reg-model-02/model.ckpt-2400
INFO:tensorflow:Evaluation [1/1]
INFO:tensorflow:Finished evaluation at 2017-11-15-16:39:09
INFO:tensorflow:Saving dict for global step 2400: average_loss = 121.123, global_step = 2400, loss = 363369.0

############################################################################################
# Valid RMSE: 11.00558 - {'average_loss': 121.12289, 'loss': 363368.69, 'global_step': 2400}
############################################################################################

* data input_fn:
================
Input file(s): data/test-*.csv
Batch size: 5000
Epoch Count: 1
Mode: eval
Thread Count: 4
Shuffle: False
================

INFO:tensorflow:Starting evaluation at 2017-11-15-16:39:09
INFO:tensorflow:Restoring parameters from trained_models/reg-model-02/model.ckpt-2400
INFO:tensorflow:Evaluation [1/1]
INFO:tensorflow:Finished evaluation at 2017-11-15-16:39:10
INFO:tensorflow:Saving dict for global step 2400: average_loss = 126.371, global_step = 2400, loss = 631856.0

############################################################################################
# Test RMSE: 11.24149 - {'average_loss': 126.3711, 'loss': 631855.5, 'global_step': 2400}
############################################################################################

7. Prediction


In [14]:
import itertools

predict_input_fn = lambda: csv_input_fn(file_names= TEST_DATA_FILES_PATTERN, 
                                      mode= tf.estimator.ModeKeys.PREDICT,
                                      batch_size= 5)

predictions = estimator.predict(input_fn=predict_input_fn)
values = list(map(lambda item: item["predictions"][0],list(itertools.islice(predictions, 5))))
print()
print("Predicted Values: {}".format(values))


* data input_fn:
================
Input file(s): data/test-*.csv
Batch size: 5
Epoch Count: 1
Mode: infer
Thread Count: 4
Shuffle: False
================

INFO:tensorflow:Restoring parameters from trained_models/reg-model-02/model.ckpt-2400

Predicted Values: [47.768089, -8.4256477, 6.3064356, 2.6255383, 2.9390121]

Serving via the Saved Model


In [15]:
import os

export_dir = model_dir +"/export/Servo/"

saved_model_dir = export_dir + "/" + os.listdir(path=export_dir)[-1] 

print(saved_model_dir)

predictor_fn = tf.contrib.predictor.from_saved_model(
    export_dir = saved_model_dir,
    signature_def_key="predict"
)

output = predictor_fn({'csv_rows': ["0.5,1,ax01,bx02", "-0.5,-1,ax02,bx02"]})
print(output)


trained_models/reg-model-02/export/Servo//1510763946
INFO:tensorflow:Restoring parameters from b'trained_models/reg-model-02/export/Servo//1510763946/variables/variables'
{'predictions': array([[ 55.06306076],
       [-16.95091438]], dtype=float32)}

What can we improve?

  • Use Dataset APIs - The dataset APIs standardise and simplifiy building a data input pipeline.
  • Use JSON Serving Function - The serving function of the exported model can receive json data
  • Use .tfrecords files instead of CSV - TFRecord files are optimised for tensorflow.
  • Build a Custom Estimator - Custom Estimator APIs give you the flexibility to build custom models in a simple and standard way

In [ ]: