In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from IPython.html.widgets import interact
from sklearn.datasets import load_digits
digits = load_digits()
In [2]:
def sigmoid(x):
return 1/(1 + np.exp(-x))
sigmoid_v = np.vectorize(sigmoid)
def sigmoidprime(x):
return sigmoid(x) * (1 - sigmoid(x))
sigmoidprime_v = np.vectorize(sigmoidprime)
In [3]:
size = [64, 20, 10]
weights = []
for n in range(1, len(size)):
weights.append(np.random.rand(size[n], size[n-1]) * 2 - 1)
biases = []
for n in range(1, len(size)):
biases.append(np.random.rand(size[n]) * 2 - 1)
trainingdata = digits.data[0:1200]
traininganswers = digits.target[0:1200]
lc = 0.02
#convert the integer answers into a 10-dimension array
traininganswervectors = np.zeros((1796,10))
for n in range(1796):
traininganswervectors[n][digits.target[n]] = 1
In [4]:
def feedforward(weights, biases, a):
b = []
#first element is inputs "a"
b.append(a)
for n in range(1, len(size)):
#all other elements depend on the number of neurons
b.append(np.zeros(size[n]))
for n2 in range(0, size[n]):
b[n][n2] = sigmoid_v(np.dot(weights[n-1][n2], b[n-1]) + biases[n-1][n2])
return b
In [5]:
opt = feedforward(weights, biases, trainingdata[0])
print(opt[-1])
print(traininganswervectors[0])
print(costderivative(opt[-1], traininganswervectors[0]))
In [6]:
def gradient_descent(weights, biases, inputs, answers, batchsize, lc, epochs):
for n in range(epochs):
#pick random locations for input/result data
locations = np.random.randint(0, len(inputs), batchsize)
minibatch = []
#create tuples (inputs, result) based on random locations
for n2 in range(batchsize):
minibatch.append((inputs[locations[n2]], answers[locations[n2]]))
for n3 in range(batchsize):
weights, biases = train(weights, biases, minibatch, lc)
results = []
for n4 in range(len(trainingdata)):
results.append(feedforward(weights, biases, inputs[n4])[-1])
accresult = accuracy(inputs, results, answers)
print("Epoch ", n, " : ", accresult)
return weights, biases
In [7]:
def train(weights, biases, minibatch, lc):
#set the nabla functions to be the functions themselves initially, same size
nb = [np.zeros(b.shape) for b in biases]
nw = [np.zeros(w.shape) for w in weights]
#largely taken from Michael Nielsen's implementation
for i, r in minibatch:
dnb, dnw = backprop(weights, biases, i, r)
nb = [a+b for a, b in zip(nb, dnb)]
nw = [a+b for a, b in zip(nw, dnw)]
weights = [w-(lc/len(minibatch))*n_w for w, n_w in zip(weights, nw)]
biases = [b-(lc/len(minibatch))*n_b for b, n_b in zip(biases, nb)]
return weights, biases
In [24]:
def backprop(weights, biases, inputs, answers):
#set the nabla functions to be the same size as functions
nb = [np.zeros(b.shape) for b in biases]
nw = [np.zeros(w.shape) for w in weights]
a = inputs
alist = [inputs]
zlist = []
#from feedforward
for n in range(1, len(size)):
#all other elements depend on the number of neurons
zlist.append(np.zeros(size[n]))
alist.append(np.zeros(size[n]))
for n2 in range(0, size[n]):
zlist[n-1][n2] = np.dot(weights[n-1][n2], alist[n-1]) + biases[n-1][n2]
alist[n][n2] = sigmoid_v(zlist[n-1][n2])
delta = costderivative(alist[-1], answers) * sigmoidprime_v(zlist[-1])
nb[-1] = delta
print("delta", delta)
print("alist", alist)
#different from MN, alist[-2] not same size as delta?
nw[-1] = np.dot(delta, alist[-2].transpose())
for n in range(2, len(size)):
delta = np.dot(weights[-n+1].transpose(), delta) * sigmoidprime_v(zlist[-n])
nb[-n] = delta
#same here
nw[-n] = np.dot(delta, alist[-n-1].transpose())
return nb, nw
In [25]:
def costderivative(output, answers):
return (output - answers)
In [26]:
def accuracy(inputs, results, answers):
correct = 0
binresults = results
for n in range(0, len(results)):
#converts the output into a binary y/n for each digit
for n2 in range(len(results[n])):
if results[n][n2] == np.amax(results[n]):
binresults[n][n2] = 1
else:
binresults[n][n2] = 0
if np.array_equal(answers[n], binresults[n]):
correct += 1
return correct / len(results)
In [27]:
size = [64, 20, 10]
weights = []
for n in range(1, len(size)):
weights.append(np.random.rand(size[n], size[n-1]) * 2 - 1)
biases = []
for n in range(1, len(size)):
biases.append(np.random.rand(size[n]) * 2 - 1)
trainingdata = digits.data[0:500]
traininganswers = digits.target[0:500]
traininganswervectors = np.zeros((500,10))
for n in range(500):
traininganswervectors[n][digits.target[n]] = 1
In [28]:
final_weights, final_biases = gradient_descent(weights, biases, trainingdata,
traininganswervectors, 5, 1, 100)
print(final_weights)
In [ ]:
In [ ]:
In [ ]: