MNIST dataset from: http://yann.lecun.com/exdb/mnist/
In [7]:
## Code from "Python Machine Learning" By Sebastian Raschka
import os
import struct
import numpy as np
def load_mnist(path, kind='train'):
"""Load MNIST dataset from 'path'"""
labels_path = os.path.join(path, '%s-labels-idx1-ubyte'%kind)
images_path = os.path.join(path, '%s-images-idx3-ubyte'%kind)
with open(labels_path, 'rb') as lbpath:
magic, n = struct.unpack('>II', lbpath.read(8))
labels = np.fromfile(lbpath, dtype=np.uint8)
with open(images_path, 'rb') as imgpath:
magic, num, rows, cols = struct.unpack('>IIII', imgpath.read(16))
images = np.fromfile(imgpath, dtype=np.uint8).reshape(len(labels), 784)
return images, labels
X_train, y_train = load_mnist('/tmp/data/', 'train')
print("Number of training samples: %d \t Number of features: %d"%(X_train.shape[0], X_train.shape[1]))
X_test, y_test = load_mnist('/tmp/data/', 't10k')
print("Number of test samples: %d "%(X_test.shape[0]))
In [23]:
import matplotlib.pyplot as plt
%matplotlib inline
fig, ax = plt.subplots(figsize=(8,4), nrows=2, ncols=5, sharex=True, sharey=True)
ax = ax.flatten()
for y in range(10):
img = X_train[y_train == y][0].reshape(28,28)
ax[y].imshow(img, cmap='Greys', interpolation='nearest')
ax[0].set_xticks([])
ax[1].set_yticks([])
plt.tight_layout()
plt.show()
In [ ]:
### Adapted from "Python Machine Learning" by Sebastian Raschka
import numpy as np
from scipy.special import expit
import sys
class NeuralNework(object):
def __init__(self, n_output, n_features, n_hidden,
epochs=500, eta=0.001, alpha=0.0, random_seed=None):
np.random.seed(random_seed)
self.n_output = n_output
self.n_features = n_features
self.n_hidden = n_hidden
self.W1, self.W2 = self._initialize_weights()
self.epochs = epochs
self.eta = eta
self.alpha = alpha
def _encode_labels(self, y, k):
onehot = np.zeros(shape=(k, y.shape[0]))
for idx, val in enumerate(y):
onehot[val, idx] = 1.0
return onehot
def _initialize_weights(self):
W1 = np.random.uniform(-1, 1, size=(n_hidden,self.n_features + 1))
W2 = np.random.uniform(-1, 1, size=(self.n_output, n_hidden + 1))
return (W1, W2)
def _sigmoid(self, z):
return expit(z)
def _tanh(self, z):
e_pos_z = np.exp(z)
e_neg_z = np.exp(-z)
return (e_pos_z - e_neg_z)/(e_pos_z + e_neg_z)
def _sigmoid_gradient(self, z):
sigval = self._sigmoid(z)
return sigmal*(1-sigval)
def _tanh_gradient(self, z):
tanhval = self._tanh(z)
return (1-tanhval**2)
def _add_bias_unit(self, X, how='column'):
if how == 'column':
X_new = np.vstack(X, np.ones(shape=(X.shape[0], 1)))
elif how == 'row':
X_new = np.hstack(np.ones(X.shape[0], 1), X)
else:
raise AttributeError("Argument 'how' must be 'column' or 'row'")
return X_new
def _layer_forward_linear(self, X, W):
z = self.dot(X.T)
activ = z
return X_w_bias, z, activ
def _layer_forward_sigmoid(self, X, W):
z = self.dot(X.T)
activ = self._sgimoid(z)
return X_w_bias, z, activ
def _layer_forward_tanh(self, X, W):
z = self.dot(X.T)
activ = self._sgimoid(z)
return X_w_bias, z, activ
def _feed_forward_full(self, X, W1, W2):
X_w_bias = self._add_bias_unit(X, how='column')
X1, z1, a1 = self._layer_forward(X_w_bias, W1)
a1_w_bias = self._add_bias_unit(X, how='row')
X2, z2, a2 = self._layer_forward(a1_w_bias, W2)
return X_w_bias, z1, a1_w_bias, z2, a2
def _get_cost(self, y_true, y_pred, W1, W2):
"""Cost: -y*ln(ypred) - (1-y)*ln(1-ypred)"""
term1 = -y_true*np.log(y_pred)
term2 = -(1-y_true)*np.log(1 - y_pred)
cost = term1 + term2
return cost
def _layer_backward(self, Y, W)
gradW1, gradW2 = sel