The most current verision of this file is at :: https://github.com/mdda/cnn-speech-mnist
In [ ]:
The stamp process in the '_Data' notebook has produced some nice-looking spectrograms with a uniform (64,32) shape.
Let's just recognise the words the stamps represent by learning to differentiate between the 'stamp' images : a task for which the MNIST CNN is almost perfect for...
In [ ]:
"""Convolutional Neural Network Estimator, built with tf.layers (originally for MNIST)."""
# FROM : https://www.tensorflow.org/tutorials/layers#building_the_cnn_mnist_classifier
# CODE : https://www.tensorflow.org/code/tensorflow/examples/tutorials/layers/cnn_mnist.py
import os
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pickle
import tensorflow as tf
from tensorflow.contrib import learn
from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib
from tensorflow.contrib.learn.python.learn.estimators import run_config
tf.logging.set_verbosity(tf.logging.INFO) # Quite a lot...
#tf.logging.set_verbosity(tf.logging.WARN) # This prevents Logging ...
do_training = True
In [ ]:
import sys
print(sys.version)
print('Tensorflow:',tf.__version__)
Expecting:
Tensorflow: 1.0.0
3.5.2 (default, Sep 14 2016, 11:28:32)
[GCC 6.2.1 20160901 (Red Hat 6.2.1-1)]
In [ ]:
prefix='num'
In [ ]:
# Load training and validation data
dataset = pickle.load(open(os.path.join('data', prefix+'.pkl'), 'rb'))
train_indices = [ i for i,r in enumerate(dataset['rand']) if r<=0.9 ]
check_indices = [ i for i,r in enumerate(dataset['rand']) if r>0.9 ]
print("Training and Validation(='check_') data loaded, %d items total " % (len(dataset['stamp']),))
In [ ]:
num_of_classes = 10
In [ ]:
def cnn_model_fn(features, integer_labels, mode):
"""Model function for CNN."""
features_images=features['images']
input_layer = tf.reshape(features_images, [-1, 64, 32, 1], name='input_layer')
# Convolutional Layer #1 (5x5 kernels)
conv1 = tf.layers.conv2d( inputs=input_layer,
filters=16, kernel_size=[5, 5], padding="same",
activation=tf.nn.relu)
# First max pooling layer with a 2x2 filter and stride of 2
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
# Convolutional Layer #2 (5x5 kernels)
conv2 = tf.layers.conv2d( inputs=pool1,
filters=16, kernel_size=[5, 5], padding="same",
activation=tf.nn.relu)
# Pooling Layer #2 (2x2 filter and stride of 2)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
# Flatten tensor into a batch of vectors
pool2_flat = tf.contrib.layers.flatten(pool2)
# Dense Layer
dense = tf.layers.dense(inputs=pool2_flat, units=32, activation=tf.nn.relu)
# Add dropout operation; 0.5 probability that element will be kept
dropout = tf.layers.dropout( inputs=dense, rate=0.5, training=(mode == learn.ModeKeys.TRAIN) )
# Logits layer
logits = tf.layers.dense(inputs=dropout, units=num_of_classes)
#logits = tf.Print(logits, [input_layer.get_shape(), integer_labels.get_shape()],
# "Debug size information : ", first_n=1)
loss = None
train_op = None
# Calculate Loss (for both TRAIN and EVAL modes)
if mode != learn.ModeKeys.INFER:
onehot_labels = tf.one_hot(indices=tf.cast(integer_labels, tf.int32), depth=num_of_classes)
loss = tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=onehot_labels)
# Configure the Training Op (for TRAIN mode)
if mode == learn.ModeKeys.TRAIN:
train_op = tf.contrib.layers.optimize_loss( loss=loss,
global_step=tf.contrib.framework.get_global_step(),
learning_rate=0.001, optimizer="Adam")
# Generate Predictions
predictions = {
"classes": tf.argmax(input=logits, axis=1),
"probabilities": tf.nn.softmax(logits, name="softmax_tensor"),
"logits": logits,
}
# Return a ModelFnOps object
return model_fn_lib.ModelFnOps( mode=mode, predictions=predictions, loss=loss, train_op=train_op)
In [ ]:
! rm -rf cnn_model
In [ ]:
# Create the Estimator : https://www.tensorflow.org/extend/estimators
tf_random_seed = 100
config = run_config.RunConfig(tf_random_seed=tf_random_seed)
cnn_classifier = learn.Estimator(
model_fn=cnn_model_fn,
model_dir="cnn_model/"+prefix, # This is relative to the ipynb
config=config)
In [ ]:
def batch_input_fn(dataset, indices, batch_size=100, seed=None, num_epochs=1):
# If seed is defined, this will shuffle data into batches
# Get the data into tensorflow
stamps = np.array( dataset['stamp'] )[indices]
print("stamps.shape:", stamps.shape)
labels = np.array( dataset['label'] )[indices]
print("labels.shape:", labels.shape)
# Ensure that the stamps are 'float32' in [0,1] and have the channel=1
stamps_with_channel = np.expand_dims( stamps / 255.0, -1)
all_images = tf.constant( stamps_with_channel, shape=stamps_with_channel.shape, dtype=tf.float32 )
all_labels = tf.constant( labels, shape=labels.shape, verify_shape=True )
print("batch_input_fn sizing : ", all_images.shape, )
if True: # This is if the number of examples is large enough to warrant batching...
# And create a 'feeder' to batch up the data appropriately...
image, label = tf.train.slice_input_producer( [ all_images, all_labels ],
num_epochs=num_epochs,
shuffle=(seed is not None), seed=seed,
)
dataset_dict = dict( images=image, labels=label ) # This becomes pluralized into batches by .batch()
batch_dict = tf.train.batch( dataset_dict, batch_size,
num_threads=1, capacity=batch_size*2,
enqueue_many=False, shapes=None, dynamic_pad=False,
allow_smaller_final_batch=False,
shared_name=None, name=None)
batch_labels = batch_dict.pop('labels')
if False:
batch_dict = dict( images=all_images )
batch_labels = all_labels
# Return :
# 1) a mapping of feature columns to Tensors with the corresponding feature data, and
# 2) the corresponding labels
return batch_dict, batch_labels
batch_size = 20
In [ ]:
if do_training:
# Set up logging for predictions
# Log the values in the "Softmax" tensor with label "probabilities"
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook( tensors=tensors_to_log, every_n_secs=20 ) #every_n_iter=1000 )
# Train the model
epochs=200
if False:
cnn_classifier.fit(
x=train_data,
y=train_labels,
batch_size=batch_size,
steps=train_labels.shape[0]/batch_size * epochs,
monitors=[logging_hook]
)
cnn_classifier.fit(
input_fn=lambda: batch_input_fn(dataset, train_indices, batch_size=batch_size,
seed=tf_random_seed, num_epochs=epochs),
#input_fn=lambda: batch_input_fn(dataset, train_indices, batch_size=len(train_indices),
# seed=tf_random_seed, num_epochs=None),
#steps=epochs,
#monitors=[logging_hook],
)
In [ ]:
# Configure the accuracy metric for evaluation
cnn_metrics = {
"accuracy":
learn.MetricSpec(
metric_fn=tf.metrics.accuracy, prediction_key="classes"),
}
# Evaluate the model and print results
#cnn_eval_results = mnist_classifier.evaluate( x=eval_data, y=eval_labels, metrics=cnn_metrics)
cnn_check_results = cnn_classifier.evaluate(
input_fn=lambda: batch_input_fn(dataset, check_indices, batch_size=len(check_indices)),
steps=1,
metrics=cnn_metrics,
)
print(cnn_check_results)
... comment on results ...
In [ ]:
dataset_test = pickle.load(open(os.path.join('data', prefix+'-test.pkl'), 'rb'))
print("Ad-hoc test data loaded")
In [ ]:
def get_predictions_for_dataset( ds ):
indices = range( len(ds['stamp']) )
cnn_predictions_generator = cnn_classifier.predict(
input_fn=lambda: batch_input_fn(ds, indices, batch_size=1),
#outputs=['probabilities'],
)
predictions = [p for p in cnn_predictions_generator]
for i,p in enumerate(predictions):
label = int(ds['label'][i])
if label>=0:
p['word'] = ds['words'][label]
else:
p['word'] = ds['words'][i]
p['label'] = label
return predictions
predictions = get_predictions_for_dataset(dataset_test)
print()
for i, prediction in enumerate(predictions):
probs = ','.join([ "%6.2f%%" % (p*100,) for p in prediction['probabilities']] )
print( "%s == %d p=[%s]" % (dataset_test['words'][i], prediction['classes'], probs,))
In [ ]:
def show_heat_map(heat_map, yticks=None):
fig, ax = plt.subplots()
ax.xaxis.tick_top()
plt.imshow(heat_map, interpolation='nearest', cmap=plt.cm.Blues, aspect='auto')
plt.xticks( range(10) )
if yticks:
plt.yticks( range(len(heat_map)), yticks )
else:
plt.yticks( range(len(heat_map)) )
plt.show()
In [ ]:
# And a heat map...
heat_map = [ prediction['probabilities'] for prediction in predictions]
show_heat_map(heat_map)
In [ ]:
dataset_animals = pickle.load(open(os.path.join('data', 'animals.pkl'), 'rb'))
predictions_animals = get_predictions_for_dataset(dataset_animals)
heat_map = [ p['probabilities'] for p in predictions_animals]
show_heat_map(heat_map, [ p['word'] for p in predictions_animals])
In [ ]:
heat_map = [ p['logits'] for p in predictions_animals]
show_heat_map(heat_map, [ p['word'] for p in predictions_animals])
In [ ]:
from sklearn import svm
animal_features, animal_targets=[],[]
for p in predictions_animals:
#animal_features.append( p['probabilities'] )
animal_features.append( p['logits'] )
animal_targets.append( p['label'] )
animals_from_numbers_svm_classifier = svm.LinearSVC()
animals_from_numbers_svm_classifier.fit(animal_features, animal_targets) # learn from the data (QUICK!)
In [ ]:
dataset_animals_test = pickle.load(open(os.path.join('data', 'animals-test.pkl'), 'rb'))
predictions_animals_test = get_predictions_for_dataset(dataset_animals_test)
print('\n\nanimals class predictions from SVM classifier based on digits-CNN output')
for i,p in enumerate(predictions_animals_test):
#svm_prediction = animals_from_numbers_svm_classifier.predict( p['probabilities'].reshape(1,-1) )
svm_prediction = animals_from_numbers_svm_classifier.predict( p['logits'].reshape(1,-1) )
#decision = animals_from_numbers_svm_classifier.decision_function([ np_logits[0] ])
print("Sound[%d] is '%s' - predicted class[%d] = '%s'" % (
i, dataset_animals['words'][i],
svm_prediction, dataset_animals['words'][svm_prediction[0]],))
... close, but no cigar ...
In [ ]: