In [8]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from IPython.html.widgets import interact
from sklearn.datasets import load_digits
digits = load_digits()
In [9]:
"""This part defines the sigmoid functions, which ae used to calculate the
output of each neuron. At large positive and negative x, it approximates the step
function, but curves near zero."""
def sigmoid(x):
return 1/(1 + np.exp(-x))
sigmoid_v = np.vectorize(sigmoid)
def sigmoidprime(x):
return sigmoid(x) * (1 - sigmoid(x))
sigmoidprime_v = np.vectorize(sigmoidprime)
In [10]:
x = np.linspace(-15, 15, 100)
y = sigmoid(x)
plt.plot(x, y)
plt.ylim(-0.3, 1.3)
Out[10]:
In [11]:
"""This section created random initial weights and biases. It also takes the
data and answers from the source, and converts the answers into 10-d vectors
ex. 6 --> [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]"""
size = [64, 20, 10]
weights = []
for n in range(1, len(size)):
weights.append(np.random.rand(size[n], size[n-1]) * 2 - 1)
biases = []
for n in range(1, len(size)):
biases.append(np.random.rand(size[n]) * 2 - 1)
trainingdata = digits.data[0:1200]
traininganswers = digits.target[0:1200]
lc = 0.02
#convert the integer answers into a 10-dimension array
traininganswervectors = np.zeros((1796,10))
for n in range(1796):
traininganswervectors[n][digits.target[n]] = 1
In [12]:
"""This function calculates the output of the network based on the weights and
biases, and uses the sigmoid function."""
def feedforward(weights, biases, a):
b = []
#first element is inputs "a"
b.append(a)
for n in range(1, len(size)):
#all other elements depend on the number of neurons
b.append(np.zeros(size[n]))
for n2 in range(0, size[n]):
b[n][n2] = sigmoid_v(np.dot(weights[n-1][n2], b[n-1]) + biases[n-1][n2])
return b
In [13]:
feedforward(weights, biases, trainingdata[0])
Out[13]:
In [14]:
"""This function just calculates the difference between the answer the network
provides and the given answer for an input for each element of the answer"""
def costderivative(output, answers):
return (output - answers)
In [15]:
"""This function is used to find the 'minimum' of the cost derivative. It selects
a 'minibatch' of random inputs from the training set to approximate the behavior
of the whole set."""
def gradient_descent(weights, biases, inputs, answers, batchsize, lc, epochs):
for n in range(epochs):
#pick random locations for input/result data
locations = np.random.randint(0, len(inputs), batchsize)
minibatch = []
#create tuples (inputs, result) based on random locations
for n2 in range(batchsize):
minibatch.append((inputs[locations[n2]], answers[locations[n2]]))
for n3 in range(batchsize):
weights, biases = train(weights, biases, minibatch, lc)
results = []
for n4 in range(len(trainingdata)):
results.append(feedforward(weights, biases, inputs[n4])[-1])
accresult = accuracy(inputs, results, answers)
print("Epoch ", n, " : ", accresult)
return weights, biases
In [16]:
"""This function adjusts the weights and biases based on the results of the
backpropagation function"""
def train(weights, biases, minibatch, lc):
#set the nabla functions to be the functions themselves initially, same size
nb = [np.zeros(b.shape) for b in biases]
nw = [np.zeros(w.shape) for w in weights]
#largely taken from Michael Nielsen's implementation
for i, r in minibatch:
dnb, dnw = backprop(weights, biases, i, r)
nb = [a+b for a, b in zip(nb, dnb)]
nw = [a+b for a, b in zip(nw, dnw)]
weights = [w-(lc/len(minibatch))*n_w for w, n_w in zip(weights, nw)]
biases = [b-(lc/len(minibatch))*n_b for b, n_b in zip(biases, nb)]
return weights, biases
In [17]:
"""This function is the most complex, and likely where I made an error. It
calculates the gradient of the change for the output, then calculates it for
each step before it (hence BACKpropagation)."""
def backprop(weights, biases, inputs, answers):
#set the nabla functions to be the same size as functions
nb = [np.zeros(b.shape) for b in biases]
nw = [np.zeros(w.shape) for w in weights]
a = inputs
alist = [inputs]
zlist = []
#from feedforward
for n in range(1, len(size)):
#all other elements depend on the number of neurons
zlist.append(np.zeros(size[n]))
alist.append(np.zeros(size[n]))
for n2 in range(0, size[n]):
zlist[n-1][n2] = np.dot(weights[n-1][n2], alist[n-1]) + biases[n-1][n2]
alist[n][n2] = sigmoid_v(zlist[n-1][n2])
delta = costderivative(alist[-1], answers) * sigmoidprime_v(zlist[-1])
nb[-1] = delta
#different from MN, alist[-2] not same size as delta?
nw[-1] = np.dot(delta, alist[-1].transpose())
for n in range(2, len(size)):
delta = np.dot(weights[-n+1].transpose(), delta) * sigmoidprime_v(zlist[-n])
nb[-n] = delta
#same here
nw[-n] = np.dot(delta, alist[-n].transpose())
return nb, nw
In [18]:
"""This function is just used to test the accuracy of each epoch. It converts the
outputs into a vector like the answers, taking the largest value to be the '1'
ex [0.21, 0.06, 0.134, 0.952, 0.558, 0.031, 0.511, 0.105, 0.216, 0.041] -->
[0, 0, 0, 1, 0, 0, 0, 0, 0, 0] == 3"""
def accuracy(inputs, results, answers):
correct = 0
binresults = results
for n in range(0, len(results)):
#converts the output into a binary y/n for each digit
for n2 in range(len(results[n])):
if results[n][n2] == np.amax(results[n]):
binresults[n][n2] = 1
else:
binresults[n][n2] = 0
if np.array_equal(answers[n], binresults[n]):
correct += 1
return correct / len(results)
In [19]:
size = [64, 20, 10]
weights = []
for n in range(1, len(size)):
weights.append(np.random.rand(size[n], size[n-1]) * 2 - 1)
biases = []
for n in range(1, len(size)):
biases.append(np.random.rand(size[n]) * 2 - 1)
trainingdata = digits.data[0:500]
traininganswers = digits.target[0:500]
traininganswervectors = np.zeros((500,10))
for n in range(500):
traininganswervectors[n][digits.target[n]] = 1
In [20]:
final_weights, final_biases = gradient_descent(weights, biases, trainingdata,
traininganswervectors, 5, 1, 100)