In [1]:
%%HTML
<style>
.container { width:100% }
</style>
In [2]:
import gzip
import pickle
import random
import numpy as np
import keras
from keras.layers import Dense
The following magic command is necessary to prevent the Python kernel to die because of linkage problems.
In [3]:
%env KMP_DUPLICATE_LIB_OK=TRUE
The function $\texttt{vectorized_result}(d)$ converts the digit $d \in \{0,\cdots,9\}$ and returns a NumPy vector $\mathbf{x}$ of shape $(10, 1)$ such that $$ \mathbf{x}[i] = \left\{ \begin{array}{ll} 1 & \mbox{if $i = j$;} \\ 0 & \mbox{otherwise.} \end{array} \right. $$ This function is used to convert a digit $d$ into the expected output of a neural network that has an output unit for every digit.
In [4]:
def vectorized_result(d):
e = np.zeros((10, ), dtype=np.float32)
e[d] = 1.0
return e
The function $\texttt{load_data}()$ returns a pair of the form $$ (\texttt{training_data}, \texttt{test_data}) $$ where
In [5]:
def load_data():
with gzip.open('mnist.pkl.gz', 'rb') as f:
train, validate, test = pickle.load(f, encoding="latin1")
X_train = np.array([np.reshape(x, (784, )) for x in train[0]])
X_test = np.array([np.reshape(x, (784, )) for x in test [0]])
Y_train = np.array([vectorized_result(y) for y in train[1]])
Y_test = np.array([vectorized_result(y) for y in test [1]])
return (X_train, X_test, Y_train, Y_test)
In [6]:
X_train, X_test, Y_train, Y_test = load_data()
Let us see what we have read:
In [7]:
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape
Out[7]:
Below, we create a neural network with two hidden layers.
In [8]:
model = keras.models.Sequential()
model.add(keras.layers.Dense( 60, activation='relu', input_dim=784))
model.add(keras.layers.Dense( 30, activation='relu' ))
model.add(keras.layers.Dense( 10, activation='softmax' ))
model.compile(loss = 'categorical_crossentropy',
optimizer = keras.optimizers.SGD(lr=0.3),
metrics = ['accuracy'])
model.summary()
In [9]:
%%time
history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=30, batch_size=100, verbose=1)
In [ ]: