In [1]:
from __future__ import division, print_function
from keras.models import Sequential, load_model
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.layers.core import Dense, Dropout
from keras.utils import np_utils
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np
import matplotlib.pyplot as plt
import os
%matplotlib inline
In [2]:
DATA_DIR = "../../data"
TRAIN_FILE = os.path.join(DATA_DIR, "mnist_train.csv")
TEST_FILE = os.path.join(DATA_DIR, "mnist_test.csv")
BATCH_SIZE = 128
NUM_CLASSES = 10
NUM_EPOCHS = 2
In [3]:
def parse_file(filename):
xdata, ydata = [], []
fin = open(filename, "rb")
i = 0
for line in fin:
if i % 10000 == 0:
print("{:s}: {:d} lines read".format(
os.path.basename(filename), i))
cols = line.strip().split(",")
ydata.append(int(cols[0]))
xdata.append([float(x) / 255. for x in cols[1:]])
i += 1
fin.close()
print("{:s}: {:d} lines read".format(os.path.basename(filename), i))
Y = np_utils.to_categorical(np.array(ydata), num_classes=NUM_CLASSES)
X = np.array(xdata)
return X, Y
Xtrain, Ytrain = parse_file(TRAIN_FILE)
Xtest, Ytest = parse_file(TEST_FILE)
print(Xtrain.shape, Ytrain.shape, Xtest.shape, Ytest.shape)
Model is identical to that defined in Keras example mnist_mlp.py.
In [4]:
model = Sequential()
model.add(Dense(512, activation="relu", input_shape=(784,)))
model.add(Dropout(0.2))
model.add(Dense(256, activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(10, activation="softmax"))
In [5]:
model.compile(optimizer="adam", loss="categorical_crossentropy",
metrics=["accuracy"])
In [6]:
model.summary()
In [7]:
for layer in model.layers:
print(layer.name, layer.input.shape, layer.output.shape)
In [8]:
from keras import backend as K
from keras.callbacks import Callback
def calc_stats(W):
return np.linalg.norm(W, 2), np.mean(W), np.std(W)
class MyDebugWeights(Callback):
def __init__(self):
super(MyDebugWeights, self).__init__()
self.weights = []
self.tf_session = K.get_session()
def on_epoch_end(self, epoch, logs=None):
for layer in self.model.layers:
name = layer.name
for i, w in enumerate(layer.weights):
w_value = w.eval(session=self.tf_session)
w_norm, w_mean, w_std = calc_stats(np.reshape(w_value, -1))
self.weights.append((epoch, "{:s}/W_{:d}".format(name, i),
w_norm, w_mean, w_std))
def on_train_end(self, logs=None):
for e, k, n, m, s in self.weights:
print("{:3d} {:20s} {:7.3f} {:7.3f} {:7.3f}".format(e, k, n, m, s))
In [22]:
my_debug_weights = MyDebugWeights()
history = model.fit(Xtrain, Ytrain, batch_size=BATCH_SIZE,
epochs=NUM_EPOCHS,
validation_split=0.1,
callbacks=[my_debug_weights])
# Train on 54000 samples, validate on 6000 samples
# Epoch 1/2
# 54000/54000 [==============================] - 4s - loss: 0.2830 - acc: 0.9146 - val_loss: 0.0979 - val_acc: 0.9718
# Epoch 2/2
# 54000/54000 [==============================] - 3s - loss: 0.1118 - acc: 0.9663 - val_loss: 0.0758 - val_acc: 0.9773
# 0 dense_1/W_0 28.236 -0.002 0.045
# 0 dense_1/W_1 0.283 0.003 0.012
# 0 dense_2/W_0 20.631 0.002 0.057
# 0 dense_2/W_1 0.205 0.008 0.010
# 0 dense_3/W_0 4.962 -0.005 0.098
# 0 dense_3/W_1 0.023 -0.001 0.007
# 1 dense_1/W_0 30.455 -0.003 0.048
# 1 dense_1/W_1 0.358 0.003 0.016
# 1 dense_2/W_0 21.989 0.002 0.061
# 1 dense_2/W_1 0.273 0.010 0.014
# 1 dense_3/W_0 5.282 -0.008 0.104
# 1 dense_3/W_1 0.040 -0.002 0.013
# Train on 54000 samples, validate on 6000 samples
# Epoch 1/2
# 54000/54000 [==============================] - 3s - loss: 0.0796 - acc: 0.9753 - val_loss: 0.0658 - val_acc: 0.9820
# Epoch 2/2
# 54000/54000 [==============================] - 3s - loss: 0.0607 - acc: 0.9803 - val_loss: 0.0745 - val_acc: 0.9803
# 0 dense_1/W_0 32.546 -0.004 0.051
# 0 dense_1/W_1 0.430 0.002 0.019
# 0 dense_2/W_0 23.191 0.001 0.064
# 0 dense_2/W_1 0.338 0.011 0.018
# 0 dense_3/W_0 5.535 -0.009 0.109
# 0 dense_3/W_1 0.060 -0.003 0.019
# 1 dense_1/W_0 34.445 -0.004 0.054
# 1 dense_1/W_1 0.490 0.001 0.022
# 1 dense_2/W_0 24.277 0.001 0.067
# 1 dense_2/W_1 0.420 0.012 0.023
# 1 dense_3/W_0 5.758 -0.011 0.113
# 1 dense_3/W_1 0.081 -0.004 0.025
# Train on 54000 samples, validate on 6000 samples
# Epoch 1/2
# 54000/54000 [==============================] - 3s - loss: 0.0473 - acc: 0.9854 - val_loss: 0.0686 - val_acc: 0.9807
# Epoch 2/2
# 54000/54000 [==============================] - 3s - loss: 0.0414 - acc: 0.9864 - val_loss: 0.0668 - val_acc: 0.9808
# 0 dense_1/W_0 36.141 -0.005 0.057
# 0 dense_1/W_1 0.551 0.000 0.024
# 0 dense_2/W_0 25.319 0.001 0.070
# 0 dense_2/W_1 0.497 0.014 0.028
# 0 dense_3/W_0 5.970 -0.012 0.117
# 0 dense_3/W_1 0.098 -0.004 0.031
# 1 dense_1/W_0 37.838 -0.005 0.060
# 1 dense_1/W_1 0.600 -0.001 0.026
# 1 dense_2/W_0 26.329 0.000 0.073
# 1 dense_2/W_1 0.567 0.014 0.033
# 1 dense_3/W_0 6.136 -0.014 0.121
# 1 dense_3/W_1 0.117 -0.005 0.037
# Train on 54000 samples, validate on 6000 samples
# Epoch 1/2
# 54000/54000 [==============================] - 3s - loss: 0.0354 - acc: 0.9876 - val_loss: 0.0619 - val_acc: 0.9837
# Epoch 2/2
# 54000/54000 [==============================] - 3s - loss: 0.0336 - acc: 0.9890 - val_loss: 0.0634 - val_acc: 0.9840
# 0 dense_1/W_0 39.366 -0.005 0.062
# 0 dense_1/W_1 0.638 -0.002 0.028
# 0 dense_2/W_0 27.335 -0.000 0.076
# 0 dense_2/W_1 0.643 0.014 0.038
# 0 dense_3/W_0 6.311 -0.015 0.124
# 0 dense_3/W_1 0.135 -0.005 0.043
# 1 dense_1/W_0 40.956 -0.006 0.064
# 1 dense_1/W_1 0.702 -0.003 0.031
# 1 dense_2/W_0 28.255 -0.001 0.078
# 1 dense_2/W_1 0.713 0.015 0.042
# 1 dense_3/W_0 6.442 -0.016 0.126
# 1 dense_3/W_1 0.150 -0.006 0.047
# Train on 54000 samples, validate on 6000 samples
# Epoch 1/2
# 54000/54000 [==============================] - 3s - loss: 0.0280 - acc: 0.9906 - val_loss: 0.0732 - val_acc: 0.9818
# Epoch 2/2
# 54000/54000 [==============================] - 3s - loss: 0.0265 - acc: 0.9909 - val_loss: 0.0727 - val_acc: 0.9812
# 0 dense_1/W_0 42.340 -0.006 0.067
# 0 dense_1/W_1 0.733 -0.005 0.032
# 0 dense_2/W_0 29.111 -0.001 0.080
# 0 dense_2/W_1 0.776 0.015 0.046
# 0 dense_3/W_0 6.574 -0.017 0.129
# 0 dense_3/W_1 0.161 -0.006 0.051
# 1 dense_1/W_0 43.770 -0.007 0.069
# 1 dense_1/W_1 0.781 -0.005 0.034
# 1 dense_2/W_0 30.020 -0.002 0.083
# 1 dense_2/W_1 0.874 0.016 0.052
# 1 dense_3/W_0 6.704 -0.018 0.131
# 1 dense_3/W_1 0.189 -0.007 0.059
In [23]:
def get_outputs(inputs, model):
layer_01_fn = K.function([model.layers[0].input, K.learning_phase()],
[model.layers[1].output])
layer_23_fn = K.function([model.layers[2].input, K.learning_phase()],
[model.layers[3].output])
layer_44_fn = K.function([model.layers[4].input, K.learning_phase()],
[model.layers[4].output])
layer_1_out = layer_01_fn([inputs, 1])[0]
layer_3_out = layer_23_fn([layer_1_out, 1])[0]
layer_4_out = layer_44_fn([layer_3_out, 1])[0]
return layer_1_out, layer_3_out, layer_4_out
out_1, out_3, out_4 = get_outputs(Xtest[0:10], model)
print("out_1", calc_stats(out_1))
print("out_3", calc_stats(out_3))
print("out_4", calc_stats(out_4))
# out_1 (15.320195, 0.15846619, 0.36553052)
# out_3 (31.983685, 0.52617866, 0.82984859)
# out_4 (1.4138139, 0.1, 0.29160777)
# out_1 (15.458527, 0.15253167, 0.38208964)
# out_3 (33.913242, 0.54224658, 0.90698332)
# out_4 (1.4142052, 0.1, 0.28973988)
# out_1 (16.639494, 0.15411146, 0.41647691)
# out_3 (35.837318, 0.58614647, 0.99438524)
# out_4 (1.4156684, 0.1, 0.29898632)
# out_1 (16.877953, 0.15098023, 0.43457347)
# out_3 (36.548904, 0.59088105, 1.0605338)
# out_4 (1.414073, 0.1, 0.29486296)
# out_1 (18.307556, 0.16563581, 0.47472247)
# out_3 (42.404495, 0.64846009, 1.242806)
# out_4 (1.4240878, 0.1, 0.29720506)
In [24]:
def get_gradients(inputs, labels, model):
opt = model.optimizer
loss = model.total_loss
weights = model.weights
grads = opt.get_gradients(loss, weights)
grad_fn = K.function(inputs=[model.inputs[0],
model.sample_weights[0],
model.targets[0],
K.learning_phase()],
outputs=grads)
grad_values = grad_fn([inputs, np.ones(len(inputs)), labels, 1])
return grad_values
gradients = get_gradients(Xtest[0:10], Ytest[0:10], model)
for i in range(len(gradients)):
print("grad_{:d}".format(i), calc_stats(gradients[i]))
# grad_0 (1.7725379, 1.1711028e-05, 0.0028093776)
# grad_1 (0.17403033, 3.4195516e-05, 0.0076910509)
# grad_2 (1.2508092, -7.3888972e-05, 0.003460743)
# grad_3 (0.12154519, -0.00047613602, 0.0075816377)
# grad_4 (1.5319482, 4.8748915e-11, 0.030318365)
# grad_5 (0.10286356, -4.6566129e-11, 0.032528315)
# grad_0 (3.4017127, 8.7506611e-05, 0.0053710202)
# grad_1 (0.33252886, 0.00055375684, 0.014685402)
# grad_2 (1.9467239, -3.3674216e-05, 0.0053783408)
# grad_3 (0.16811177, -0.00019758131, 0.010505128)
# grad_4 (1.8920149, -3.4779077e-10, 0.037405979)
# grad_5 (0.11266962, -2.7939678e-10, 0.035629261)
# grad_0 (4.4856653, 0.00014608752, 0.0070793224)
# grad_1 (0.43840903, 0.00093970483, 0.019352324)
# grad_2 (2.4390073, 9.5780408e-05, 0.006736787)
# grad_3 (0.19859995, 0.00049467472, 0.012402636)
# grad_4 (2.9728518, -1.4736087e-10, 0.058762152)
# grad_5 (0.13749355, -6.9849196e-11, 0.043479279)
# grad_0 (0.94408065, 5.3343301e-06, 0.0014902415)
# grad_1 (0.092352077, 3.1091229e-05, 0.0040813056)
# grad_2 (0.57179779, -2.3590032e-05, 0.0015793034)
# grad_3 (0.043331128, -0.00013161075, 0.0027049957)
# grad_4 (0.63560385, 1.2043984e-10, 0.012562943)
# grad_5 (0.028290441, -1.7062121e-10, 0.0089462232)
# grad_0 (4.5891175, -7.2404553e-05, 0.0072430321)
# grad_1 (0.44867462, -0.00047407666, 0.019823136)
# grad_2 (2.6217206, 2.5737674e-05, 0.0072415713)
# grad_3 (0.18206903, 0.00012690801, 0.011378606)
# grad_4 (3.2452161, -2.9717739e-11, 0.064140067)
# grad_5 (0.12291637, -3.783498e-10, 0.038869571)
In [ ]: