In [1]:
%autosave 120
import numpy as np
np.random.seed(1337)
import datetime
import graphviz
from IPython.display import SVG
import keras
from keras import activations
from keras import backend as K
from keras.datasets import mnist
from keras.layers import (
concatenate,
Concatenate,
Conv1D,
Conv2D,
Dense,
Dropout,
Embedding,
Flatten,
Input,
MaxPooling1D,
MaxPooling2D)
from keras.models import load_model, Model, Sequential
from keras_tqdm import TQDMNotebookCallback
from keras.utils import plot_model
from keras.utils.vis_utils import model_to_dot
import math
import matplotlib
from matplotlib import gridspec
import matplotlib.pylab as plt
from matplotlib.ticker import NullFormatter, NullLocator, MultipleLocator
import pandas as pd
import random
from scipy import stats
import seaborn as sns
from sklearn.datasets import load_iris
import sklearn.ensemble
import sklearn.tree
from sklearn.metrics import (
auc,
confusion_matrix,
roc_curve,
precision_score)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import sqlite3
import sys
import talos as ta
import tensorflow as tf
from tensorflow.python.client.device_lib import list_local_devices
from tqdm import tqdm_notebook
import uuid
from vis.utils import utils
from vis.visualization import visualize_activation
from vis.visualization import visualize_saliency
import warnings
pd.set_option("display.max_columns", 500)
pd.set_option("display.max_rows", 500)
sns.set_palette('husl')
sns.set(style='ticks')
warnings.filterwarnings("ignore")
print('Python version:', sys.version)
print('Matplotlib version:', matplotlib.__version__)
print('NumPy version:', np.__version__)
print('Keras version:', keras.__version__)
print('TensorFlow version:', tf.__version__)
list_local_devices()
Out[1]:
In [2]:
%matplotlib inline
plt.rcParams['figure.figsize'] = [10, 10]
In [3]:
# input dimensions
img_x = 28
img_y = 28
In [4]:
# Load MNIST data into training and testing datasets. The x data are the features and the y data are the labels.
(x_train, y_train), (x_test, y_test) = mnist.load_data()
num_classes = 10
In [5]:
# Reshape the data into a 4D tensor (sample_number, x_img_size, y_img_size, num_channels).
# MNIST is greyscale, which corresponds to a single channel/dimension.
# Alternatively, color, for example RGB, would correspond to three channels/dimensions.
x_train = x_train.reshape(x_train.shape[0], img_x, img_y, 1)
x_test = x_test.reshape(x_test.shape[0], img_x, img_y, 1)
input_shape = (img_x, img_y, 1)
# Cast the data as type float32.
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train = x_train / 255
x_test = x_test / 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
In [6]:
# Convert class vectors to binary class matrices for use in the categorical_crossentropy loss.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
In [7]:
x_train.shape
Out[7]:
In [8]:
input_shape
Out[8]:
In [9]:
y_train[0] # labels
Out[9]:
In [10]:
y_train.shape
Out[10]:
In [11]:
for i in x_train[0].tolist():
print('\n', i)
In [12]:
plt.imshow(x_train[21].reshape(28, 28), cmap="Greys", interpolation="nearest");
In [13]:
model = Sequential()
model.add(Conv2D( # Add a 2D convolutional layer to process the 2D input (image) data.
32, # number of output channels
kernel_size = (3, 3), # kernel: 3 x 3 moving window
strides = (1, 1), # kernel strides in the x and y dimensions -- default: (1, 1)
activation = 'relu', # activation function: ReLU
input_shape = input_shape # input size/shape
))
model.add(MaxPooling2D( # Add a 2D max pooling layer.
pool_size = (2, 2), # size of the pooling in the x and y dimensions
strides = (2, 2) # strides in the x and y dimensions
))
# Add a convolutional layer. The input tensor for this layer is (batch_size, 28, 28, 32),
# where 28 x 28 corresponds to the input dimensions and 32 is the number of output channels from the previous layer.
model.add(Conv2D(
64, # number of output channels
(5, 5), # kernel: 5 x 5 moving window
strides = (1, 1), # kernel strides in x and y dimensions -- default: (1, 1)
activation = 'relu' # activation function: ReLU
))
model.add(Dropout(rate=0.5)) # Add a dropout layer.
model.add(MaxPooling2D( # Add a 2D max pooling layer.
pool_size = (2, 2) # size of the pooling in the x and y dimensions
))
# Flatten the output from convolutional layers to prepare them for input to fully-connected layers.
model.add(Flatten())
model.add(Dense( # Specify a fully-connected layer.
1000, # number of nodes
activation = 'relu' # activation function: ReLU
))
model.add(Dense( # Specify a fully-connected output layer.
num_classes, # number of classes
activation = 'softmax', # softmax classification
name = "preds"
))
#plot_model(model, to_file="model.png")
model.summary()
SVG(model_to_dot(model).create(prog='dot', format='svg'));
In [14]:
model.compile(
loss = 'categorical_crossentropy',
optimizer = 'nadam',
metrics = ['accuracy']
)
checkpoint = keras.callbacks.ModelCheckpoint(
filepath = 'best_model.{epoch:02d}-{val_loss:.2f}.h5',
monitor = 'val_loss',
save_best_only = True
)
In [15]:
%%time
out = model.fit(
x_train,
y_train,
batch_size = 512,
epochs = 5,
verbose = True,
validation_data = (x_test, y_test),
callbacks = [checkpoint]
)
score = model.evaluate(x_test, y_test, verbose=False)
In [16]:
print('test loss:', score[0])
print('test accuracy:', score[1])
plt.plot(out.history['acc'], label='train')
plt.plot(out.history['val_acc'], label='validate')
plt.legend()
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.show();
In deep learning research, researchers tend to focus on visualization of learned features of each neuron using various optimization-based algorithms. These optimization-based methods are currently divided into two categories: activation maximization and code inversion.
In a convolutional neural network, each convolution layer has several learned template matching filters that maximize their output when a similar template pattern is found in an input image. The first convolution layer is straightforward to visualise; simply visualise the weights as an image. To see what the layer is doing, a simple option is to apply the filter over raw input pixels. Higher convolution filters operate on the outputs of lower convolution filters (which indicate the presence or absense of some template patterns), making them harder to interpret.
The idea of activation maximization is to generate an input image that maximizes the filter output activations. This approach enables us to see wat sorts of input patterns activate a particular filter.
In [17]:
layer_idx = utils.find_layer_idx(model, 'preds')
model.layers[layer_idx].activation = activations.linear # Swap softmax for linear.
model = utils.apply_modifications(model)
filter_idx = 0
img = visualize_activation(model, layer_idx, filter_indices=filter_idx, verbose=False)
im = plt.imshow(img[..., 0])
plt.colorbar(im, fraction=0.0458, pad=0.04);
In [18]:
layer_idx = utils.find_layer_idx(model, 'preds')
model.layers[layer_idx].activation = activations.linear # Swap softmax for linear.
model = utils.apply_modifications(model)
filter_idx = 4
img = visualize_activation(model, layer_idx, filter_indices=filter_idx, verbose=False)
im = plt.imshow(img[..., 0])
plt.colorbar(im, fraction=0.0458, pad=0.04);
Understanding how convolutional neural networks work is hard. There's a two-step understanding method called Salient Relevance (SR) map which aims to illuminate how deep convolutional neural networks recognise inputs and features from areas called attention areas therein. In this method, there is first a layer-wise relevance propagation (LRP) step which estimates a 'pixel'-wise relevance map over the input. Then there is constructed a context-aware saliency map from the LRP-generated map which predicts areas close to the foci of attention instead of isolated 'pixels' that the LRP reveals. This perhaps corresponds to recognition in the human visual system where information on regions is more important than information of pixels. So the original designers suggest that this saliency is something of a simulation of human visual recognision. The method seems to identify not only key pixels but also attention areas that contribute to the underlying neural network comprehension of inputs. So overall the salient relevance is a visual interface which unveils some of the visual attention of the network and reveals which type of objects the model has learned to recognize after training.
A saliency map highlights those input elements that are most important for classification of the input.
Given an input ${I_{o}}$ (e.g. an image), a class ${c}$ and a classification convolutional neural network with the class score function ${S_{c}\left(I\right)}$, which is computed by the classification layer of the network, features of input ${I_{o}}$ can be ranked by their influence on the class score ${S_{c}\left(I_{o}\right)}$ in order to create a saliency map. In this case the saliency map specific to a specific input case. One interpretation of computing this image-specific class saliency is that the magnitude of the class score derivative indicates which input features require the least change to affect the class score the most.
In [19]:
grads = visualize_saliency(model, layer_idx, filter_indices=filter_idx, seed_input=x_test[13], backprop_modifier='guided')
im = plt.imshow(grads, cmap='jet')
plt.colorbar(im, fraction=0.0458, pad=0.04);
In [20]:
# get indices in test dataset of instances of the class 0
y_test_non_categorical = np.argmax(y_test, axis=1)
indices = [i for i, j in enumerate(y_test_non_categorical) if j == 0]
# get the instances
x_test_0 = [x_test[i] for i in indices]
sample_size = 100
saliencies = []
for i in list(range(0, sample_size - 1)):
saliencies.append(visualize_saliency(model, layer_idx, filter_indices=filter_idx, seed_input=x_test_0[i], backprop_modifier='guided'))
im = plt.imshow(np.mean(saliencies, axis=0), cmap='jet')
plt.colorbar(im, fraction=0.0458, pad=0.04);
In [21]:
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.sem.html
im = plt.imshow(stats.sem(saliencies, axis=0), cmap='jet');
plt.colorbar(im, fraction=0.0458, pad=0.04);
In [22]:
mean = np.mean(saliencies, axis=0)
statistical_uncertainty = stats.sem(saliencies, axis=0)
x = list(range(0, len(mean.flatten()-1)))
y = mean.flatten()
y_su = statistical_uncertainty.flatten()
plt.plot(x, y, 'k-')
plt.fill_between(x, y-10*y_su/2, y+10*y_su/2)
plt.show();
In [23]:
plt.rcParams["figure.figsize"] = [10, 10]
xi = 0.2; yi = 0.2; wi = 0.7; hi = 0.7 # image
xc = 0.91; yc = 0.2; wc = 0.05; hc = 0.7 # colorbar
xh = 0.2; yh = 0.0; wh = 0.7; hh = 0.2 # horizontal plot
xv = 0.0; yv = 0.2; wv = 0.2; hv = 0.7 # vertical plot
ax_i = plt.axes((xi, yi, wi, hi))
ax_h = plt.axes((xh, yh, wh, hh))
ax_v = plt.axes((xv, yv, wv, hv))
ax_c = plt.axes((xc, yc, wc, hc))
ax_i.xaxis.set_major_formatter(NullFormatter())
ax_i.yaxis.set_major_formatter(NullFormatter())
ax_h.yaxis.set_major_formatter(NullFormatter())
ax_v.xaxis.set_major_formatter(NullFormatter())
plt.axes(ax_i)
plt.imshow(mean, aspect='auto', cmap="jet")
ax_h.plot(list(range(0, 28)), mean.sum(axis=0), '-k', drawstyle='steps')
ax_h.plot(list(range(0, 28)), mean.sum(axis=0) + np.sum(statistical_uncertainty, axis=0)/2, '-k', drawstyle='steps', color='red')
ax_h.plot(list(range(0, 28)), mean.sum(axis=0) - np.sum(statistical_uncertainty, axis=0)/2, '-k', drawstyle='steps', color='blue')
ax_v.plot(mean.sum(axis=1), list(range(0, 28)), '-k', drawstyle='steps')
ax_v.plot(mean.sum(axis=1) + np.sum(statistical_uncertainty, axis=1)/2, list(range(0, 28)), '-k', drawstyle='steps', color='red')
ax_v.plot(mean.sum(axis=1) - np.sum(statistical_uncertainty, axis=1)/2, list(range(0, 28)), '-k', drawstyle='steps', color='blue')
cb = plt.colorbar(cax=ax_c)
#cb.set_label('intensity')
#ax_i.set_title('input')
#ax_h.set_xlabel('${x}$')
#ax_h.set_ylabel('intensity')
#ax_h.yaxis.set_label_position('right')
#ax_v.set_ylabel('${y}$')
#ax_v.set_xlabel('intensity')
#ax_v.xaxis.set_label_position('top')
plt.show();
In [24]:
plt.rcParams["figure.figsize"] = [10, 10]
xi = 0.2; yi = 0.2; wi = 0.7; hi = 0.7 # image
xc = 0.91; yc = 0.2; wc = 0.05; hc = 0.7 # colorbar
xh = 0.2; yh = 0.0; wh = 0.7; hh = 0.2 # horizontal plot
xv = 0.0; yv = 0.2; wv = 0.2; hv = 0.7 # vertical plot
ax_i = plt.axes((xi, yi, wi, hi))
ax_h = plt.axes((xh, yh, wh, hh))
ax_v = plt.axes((xv, yv, wv, hv))
ax_c = plt.axes((xc, yc, wc, hc))
ax_i.xaxis.set_major_formatter(NullFormatter())
ax_i.yaxis.set_major_formatter(NullFormatter())
ax_h.yaxis.set_major_formatter(NullFormatter())
ax_v.xaxis.set_major_formatter(NullFormatter())
plt.axes(ax_i)
plt.imshow(mean, aspect='auto', cmap="jet")
ax_h.plot(list(range(0, 28)), mean.sum(axis=0), '-k', drawstyle='steps')
#ax_h.plot(list(range(0, 28)), mean.sum(axis=0) + np.sum(statistical_uncertainty, axis=0)/2, '-k', drawstyle='steps', color='red')
#ax_h.plot(list(range(0, 28)), mean.sum(axis=0) - np.sum(statistical_uncertainty, axis=0)/2, '-k', drawstyle='steps', color='blue')
ax_h.fill_between(
list(range(0, 28)),
mean.sum(axis=0) + np.sum(statistical_uncertainty, axis=0)/2,
mean.sum(axis=0) - np.sum(statistical_uncertainty, axis=0)/2,
step = 'pre',
facecolor = 'red',
alpha = 0.5
)
ax_h.set_xlim(-1, 27)
ax_v.plot(mean.sum(axis=1), list(range(0, 28)), '-k', drawstyle='steps')
#ax_v.plot(mean.sum(axis=1) + np.sum(statistical_uncertainty, axis=1)/2, list(range(0, 28)), '-k', drawstyle='steps', color='red')
#ax_v.plot(mean.sum(axis=1) - np.sum(statistical_uncertainty, axis=1)/2, list(range(0, 28)), '-k', drawstyle='steps', color='blue')
ax_v.fill_betweenx(
list(range(1, 29)),
mean.sum(axis=1) + np.sum(statistical_uncertainty, axis=1)/2,
mean.sum(axis=1) - np.sum(statistical_uncertainty, axis=1)/2,
step = 'pre',
facecolor = 'red',
alpha = 0.5
)
ax_v.set_ylim(0, 28)
cb = plt.colorbar(cax=ax_c)
#cb.set_label('intensity')
#ax_i.set_title('input')
#ax_h.set_xlabel('${x}$')
#ax_h.set_ylabel('intensity')
#ax_h.yaxis.set_label_position('right')
#ax_v.set_ylabel('${y}$')
#ax_v.set_xlabel('intensity')
#ax_v.xaxis.set_label_position('top')
plt.show();
In [ ]: