Development has moved

The most current verision of this file is at :: https://github.com/mdda/cnn-speech-mnist


In [ ]:

Train a CNN to Recognise Words

The stamp process in the '_Data' notebook has produced some nice-looking spectrograms with a uniform (64,32) shape.

Let's just recognise the words the stamps represent by learning to differentiate between the 'stamp' images : a task for which the MNIST CNN is almost perfect for...


In [ ]:
"""Convolutional Neural Network Estimator, built with tf.layers (originally for MNIST)."""

#  FROM : https://www.tensorflow.org/tutorials/layers#building_the_cnn_mnist_classifier
#  CODE : https://www.tensorflow.org/code/tensorflow/examples/tutorials/layers/cnn_mnist.py

import os

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import pickle

import tensorflow as tf

from tensorflow.contrib import learn
from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib
from tensorflow.contrib.learn.python.learn.estimators import run_config

tf.logging.set_verbosity(tf.logging.INFO)  # Quite a lot...
#tf.logging.set_verbosity(tf.logging.WARN)   # This prevents Logging ...

do_training = True

In [ ]:
import sys
print(sys.version)
print('Tensorflow:',tf.__version__)

Expecting:

Tensorflow: 1.0.0
3.5.2 (default, Sep 14 2016, 11:28:32) 
[GCC 6.2.1 20160901 (Red Hat 6.2.1-1)]

In [ ]:
prefix='num'

In [ ]:
# Load training and validation data
dataset = pickle.load(open(os.path.join('data', prefix+'.pkl'), 'rb'))

train_indices = [ i for i,r in enumerate(dataset['rand']) if r<=0.9 ]
check_indices = [ i for i,r in enumerate(dataset['rand']) if r>0.9 ]

print("Training and Validation(='check_') data loaded, %d items total " % (len(dataset['stamp']),))

In [ ]:
num_of_classes = 10

In [ ]:
def cnn_model_fn(features, integer_labels, mode):
    """Model function for CNN."""

    features_images=features['images']

    input_layer = tf.reshape(features_images, [-1, 64, 32, 1], name='input_layer')

    # Convolutional Layer #1 (5x5 kernels)
    conv1 = tf.layers.conv2d( inputs=input_layer,
      filters=16, kernel_size=[5, 5], padding="same",
      activation=tf.nn.relu)

    # First max pooling layer with a 2x2 filter and stride of 2
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    # Convolutional Layer #2 (5x5 kernels)
    conv2 = tf.layers.conv2d( inputs=pool1,
      filters=16, kernel_size=[5, 5], padding="same",
      activation=tf.nn.relu)

    # Pooling Layer #2 (2x2 filter and stride of 2)
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

    # Flatten tensor into a batch of vectors
    pool2_flat = tf.contrib.layers.flatten(pool2)

    # Dense Layer
    dense = tf.layers.dense(inputs=pool2_flat, units=32, activation=tf.nn.relu)

    # Add dropout operation; 0.5 probability that element will be kept
    dropout = tf.layers.dropout( inputs=dense, rate=0.5, training=(mode == learn.ModeKeys.TRAIN) )

    # Logits layer
    logits = tf.layers.dense(inputs=dropout, units=num_of_classes)
    #logits = tf.Print(logits, [input_layer.get_shape(), integer_labels.get_shape()], 
    #           "Debug size information : ", first_n=1)

    loss = None
    train_op = None

    # Calculate Loss (for both TRAIN and EVAL modes)
    if mode != learn.ModeKeys.INFER:
        onehot_labels = tf.one_hot(indices=tf.cast(integer_labels, tf.int32), depth=num_of_classes)
        loss = tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=onehot_labels)

    # Configure the Training Op (for TRAIN mode)
    if mode == learn.ModeKeys.TRAIN:
        train_op = tf.contrib.layers.optimize_loss( loss=loss,
                          global_step=tf.contrib.framework.get_global_step(),
                          learning_rate=0.001, optimizer="Adam")

    # Generate Predictions
    predictions = {
        "classes":       tf.argmax(input=logits, axis=1),
        "probabilities": tf.nn.softmax(logits, name="softmax_tensor"), 
        "logits":        logits,
    }

    # Return a ModelFnOps object
    return model_fn_lib.ModelFnOps( mode=mode, predictions=predictions, loss=loss, train_op=train_op)

In [ ]:
! rm -rf cnn_model

In [ ]:
# Create the Estimator : https://www.tensorflow.org/extend/estimators
tf_random_seed = 100
config = run_config.RunConfig(tf_random_seed=tf_random_seed)

cnn_classifier = learn.Estimator(
    model_fn=cnn_model_fn, 
    model_dir="cnn_model/"+prefix, # This is relative to the ipynb
    config=config)

In [ ]:
def batch_input_fn(dataset, indices, batch_size=100, seed=None, num_epochs=1):  
    # If seed is defined, this will shuffle data into batches

    # Get the data into tensorflow
    stamps = np.array( dataset['stamp'] )[indices]
    print("stamps.shape:", stamps.shape)
    labels = np.array( dataset['label'] )[indices]
    print("labels.shape:", labels.shape)
    
    # Ensure that the stamps are 'float32' in [0,1] and have the channel=1
    stamps_with_channel = np.expand_dims( stamps / 255.0, -1)

    all_images = tf.constant( stamps_with_channel, shape=stamps_with_channel.shape, dtype=tf.float32 )
    all_labels = tf.constant( labels, shape=labels.shape, verify_shape=True )
    
    print("batch_input_fn sizing : ", all_images.shape, )
    
    if True:  # This is if the number of examples is large enough to warrant batching...
        # And create a 'feeder' to batch up the data appropriately...
        image, label = tf.train.slice_input_producer( [ all_images, all_labels ], 
                                               num_epochs=num_epochs,
                                               shuffle=(seed is not None), seed=seed,
                                             )

        dataset_dict = dict( images=image, labels=label ) # This becomes pluralized into batches by .batch()

        batch_dict = tf.train.batch( dataset_dict, batch_size,
                                    num_threads=1, capacity=batch_size*2, 
                                    enqueue_many=False, shapes=None, dynamic_pad=False, 
                                    allow_smaller_final_batch=False, 
                                    shared_name=None, name=None)

        batch_labels = batch_dict.pop('labels')
    
    if False:
        batch_dict = dict( images=all_images )
        batch_labels = all_labels
    
    # Return : 
    # 1) a mapping of feature columns to Tensors with the corresponding feature data, and 
    # 2) the corresponding labels
    return batch_dict, batch_labels

batch_size = 20

In [ ]:
if do_training:
    # Set up logging for predictions
    # Log the values in the "Softmax" tensor with label "probabilities"
    tensors_to_log = {"probabilities": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook( tensors=tensors_to_log, every_n_secs=20 ) #every_n_iter=1000 )

    # Train the model
    epochs=200

    if False:
        cnn_classifier.fit(
          x=train_data,
          y=train_labels,
          batch_size=batch_size,
          steps=train_labels.shape[0]/batch_size * epochs,
          monitors=[logging_hook]
        )

    cnn_classifier.fit(
        input_fn=lambda: batch_input_fn(dataset, train_indices, batch_size=batch_size, 
                                        seed=tf_random_seed, num_epochs=epochs), 
        
        #input_fn=lambda: batch_input_fn(dataset, train_indices, batch_size=len(train_indices), 
        #                                seed=tf_random_seed, num_epochs=None), 
        #steps=epochs,
        
        #monitors=[logging_hook],
    )

In [ ]:
# Configure the accuracy metric for evaluation
cnn_metrics = {
  "accuracy":
      learn.MetricSpec(
          metric_fn=tf.metrics.accuracy, prediction_key="classes"),
}

# Evaluate the model and print results
#cnn_eval_results = mnist_classifier.evaluate( x=eval_data, y=eval_labels, metrics=cnn_metrics)

cnn_check_results = cnn_classifier.evaluate(
    input_fn=lambda: batch_input_fn(dataset, check_indices, batch_size=len(check_indices)), 
    steps=1,
    metrics=cnn_metrics,
)

print(cnn_check_results)

... comment on results ...

Now let's look at some 'live examples'


In [ ]:
dataset_test = pickle.load(open(os.path.join('data', prefix+'-test.pkl'), 'rb'))

print("Ad-hoc test data loaded")

In [ ]:
def get_predictions_for_dataset( ds ):
    indices = range( len(ds['stamp']) )

    cnn_predictions_generator = cnn_classifier.predict( 
        input_fn=lambda: batch_input_fn(ds, indices, batch_size=1),
        #outputs=['probabilities'],
    )
    
    predictions = [p for p in cnn_predictions_generator]
    for i,p in enumerate(predictions):
        label = int(ds['label'][i])
        if label>=0:
            p['word'] = ds['words'][label]
        else:
            p['word'] = ds['words'][i]
        p['label'] = label
    
    return predictions

predictions = get_predictions_for_dataset(dataset_test)

print()
for i, prediction in enumerate(predictions):
    probs = ','.join([ "%6.2f%%" % (p*100,) for p in prediction['probabilities']] )
    print( "%s == %d  p=[%s]" % (dataset_test['words'][i], prediction['classes'],  probs,))

In [ ]:
def show_heat_map(heat_map, yticks=None):
    fig, ax = plt.subplots()
    ax.xaxis.tick_top()
    plt.imshow(heat_map, interpolation='nearest', cmap=plt.cm.Blues, aspect='auto')
    plt.xticks( range(10) )
    if yticks:
        plt.yticks( range(len(heat_map)), yticks )
    else:
        plt.yticks( range(len(heat_map)) )
    plt.show()

In [ ]:
# And a heat map...
heat_map = [ prediction['probabilities'] for prediction in predictions]

show_heat_map(heat_map)

Extra...

What happens if we try to look at the 'animals' test with the 'num' network?


In [ ]:
dataset_animals = pickle.load(open(os.path.join('data', 'animals.pkl'), 'rb'))

predictions_animals = get_predictions_for_dataset(dataset_animals)

heat_map = [ p['probabilities'] for p in predictions_animals]

show_heat_map(heat_map, [ p['word'] for p in predictions_animals])

In [ ]:
heat_map = [ p['logits'] for p in predictions_animals]
show_heat_map(heat_map, [ p['word'] for p in predictions_animals])

In [ ]:
from sklearn import svm

animal_features, animal_targets=[],[]
for p in predictions_animals:
    #animal_features.append( p['probabilities'] )
    animal_features.append( p['logits'] )
    animal_targets.append( p['label'] )

animals_from_numbers_svm_classifier = svm.LinearSVC()
animals_from_numbers_svm_classifier.fit(animal_features, animal_targets) # learn from the data (QUICK!)

In [ ]:
dataset_animals_test = pickle.load(open(os.path.join('data', 'animals-test.pkl'), 'rb'))

predictions_animals_test = get_predictions_for_dataset(dataset_animals_test)

print('\n\nanimals class predictions from SVM classifier based on digits-CNN output')
for i,p in enumerate(predictions_animals_test):
    #svm_prediction = animals_from_numbers_svm_classifier.predict( p['probabilities'].reshape(1,-1) )
    svm_prediction = animals_from_numbers_svm_classifier.predict( p['logits'].reshape(1,-1) )
    #decision     = animals_from_numbers_svm_classifier.decision_function([ np_logits[0] ])
    
    print("Sound[%d] is '%s' - predicted class[%d] = '%s'" % (
            i, dataset_animals['words'][i], 
            svm_prediction, dataset_animals['words'][svm_prediction[0]],))

... close, but no cigar ...


In [ ]: