In [1]:
import numpy as np
import csv
import os
import random
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
def sigmoid(z):
return 1.0/(1.0 + np.exp(-z))
def sigmoid_prime(z):
return sigmoid(z)*(1-sigmoid(z))
class FullyConnectedNN:
def __init__(self, sizes):
"""Initialize fully connected NN using the list size"""
self.sizes = sizes
self.W = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]
self.B = [np.random.randn(y, 1) for y in sizes[1:]]
def feedforward(self, x):
for w, b in zip(self.W, self.B):
x = sigmoid(np.dot(w, x) + b)
return x
def cost(self, output, y):
return 0.5*(output - y)*(output - y)
def cost_derivative(self, output, y):
return (output-y)
def evaluate(self, test_data):
test_results = []
for i in range(0,len(test_data)):
test_results.append(
(np.argmax(self.feedforward(test_data[i][0])),
test_data[i][1]))
return sum(int(x == y) for (x, y) in test_results) \
/ float(len(test_data))
def backprop(self, trn_sample):
# Return nabla_b and nabla_w, the cost gradients
# with respect to B and W
nabla_b = [np.zeros(b.shape) for b in self.B]
nabla_w = [np.zeros(w.shape) for w in self.W]
# Feedforward
activation = trn_sample[0]
activations = [trn_sample[0]]
# activation = trn_sample
# activations = [trn_sample]
zs = [] # list of layer zs
for b, w in zip(self.B, self.W):
z = np.dot(w, activation)+b
zs.append(z)
activation = sigmoid(z)
activations.append(activation)
# Backward pass
delta = self.cost_derivative(activations[-1], trn_sample[1]) * \
sigmoid_prime(zs[-1])
nabla_b[-1] = delta
nabla_w[-1] = np.dot(delta, activations[-2].transpose())
for l in xrange(2, len(self.sizes)):
z = zs[-l]
sp = sigmoid_prime(z)
delta = np.dot(self.W[-l+1].transpose(), delta) * sp
nabla_b[-l] = delta
nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
return (nabla_b, nabla_w)
def train(self, trn_data, nEpoch, mini_size,
eta, test_data=None):
num_sample = len(trn_data)
for e in xrange(nEpoch):
random.shuffle(trn_data)
i = 0
while i < num_sample:
# update on minibatch
nabla_b = [np.zeros(b.shape) for b in self.B]
nabla_w = [np.zeros(w.shape) for w in self.W]
for j in xrange(0,mini_size):
dnabla_b, dnabla_w = self.backprop(trn_data[i])
nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, dnabla_b)]
nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, dnabla_w)]
i += 1
self.B = [b - (eta/float(mini_size))*nb for b, nb in zip(self.B, nabla_b)]
self.W = [w - (eta/float(mini_size))*nw for w, nw in zip(self.W, nabla_w)]
# Evaluate on test set
if test_data:
if nEpoch > 20:
if e % (nEpoch/10) == 0:
print "Epoch", e, "Accuracy: ", \
self.evaluate(test_data)
else:
print "Epoch", e, "Accuracy: ", \
self.evaluate(test_data)