In [371]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from IPython.html.widgets import interact
from sklearn.datasets import load_digits
digits = load_digits()
In [372]:
def sigmoid(x):
return 1/(1 + np.exp(-x))
sigmoid_v = np.vectorize(sigmoid)
def sigmoidprime(x):
return sigmoid(x) * (1 - sigmoid(x))
sigmoidprime_v = np.vectorize(sigmoidprime)
In [373]:
size = [64, 20, 10]
weights = []
for n in range(1, len(size)):
weights.append(np.random.rand(size[n-1], size[n]) * 2 - 1)
biases = []
for n in range(1, len(size)):
biases.append(np.random.rand(size[n]) * 2 - 1)
trainingdata = digits.data[0:1200]
traininganswers = digits.target[0:1200]
lc = 0.02
#convert the integer answers into a 10-dimension array
traininganswervectors = np.zeros((1796,10))
for n in range(1796):
traininganswervectors[n][digits.target[n]] = 1
In [374]:
def feedforward(a, weights, biases):
b = []
#first element is inputs "a"
b.append(a)
for n in range(1, len(size)):
#all other elements depend on the number of neurons
b.append(np.zeros(size[n]))
for n2 in range(0, size[n]):
b[n][n2] = sigmoid_v(np.dot(weights[n-1][0:,n2], b[n-1]) + biases[n-1][n2])
return b
In [375]:
feedforward(trainingdata[0], weights, biases)
Out[375]:
In [376]:
def GradientDescent(inputs, answers, weights, biases, batchsize, lc, epochs):
for n in range(epochs):
#pick random locations for input/result data
locations = np.random.randint(0, len(inputs), batchsize)
minibatch = []
#create tuples (inputs, result) based on random locations
for n2 in range(batchsize):
minibatch.append((inputs[locations[n2]], answers[locations[n2]]))
for n3 in range(batchsize):
train(minibatch, weights, biases, lc)
results = []
for n4 in range(len(trainingdata)):
results.append(feedforward(inputs[n4], weights, biases)[-1])
accresult = accuracy(inputs, results, answers)
print("Epoch ", n, " : ", accresult)
In [ ]:
In [397]:
def train(minibatch, weights, biases, lc):
#set the nabla functions to be the functions themselves initially, same size
nb = [np.zeros(b.shape) for b in biases]
nw = [np.zeros(w.shape) for w in weights]
#largely taken from Michael Nielsen's implementation
for i, r in minibatch:
dnb, dnw = backprop(i, r)
nb = [a+b for a, b in zip(nb, dnb)]
nw = [a+b for a, b in zip(nw, dnw)]
print(weights[0][0])
#not changing?
weights = [w-(lc/len(minibatch))*nw_ for w, nw_ in zip(weights, nw)]
biases = [b-(lc/len(minibatch))*nb_ for b, nb_ in zip(biases, nb)]
In [378]:
def backprop(inputs, answers):
#set the nabla functions to be the functions themselves initially, same size
nb = [np.zeros(b.shape) for b in biases]
nw = [np.zeros(w.shape) for w in weights]
a = inputs
alist = [inputs]
zlist = []
#from feedforward
for n in range(1, len(size)):
#all other elements depend on the number of neurons
alist.append(np.zeros(size[n]))
zlist.append(np.zeros(size[n]))
for n2 in range(0, size[n]):
alist[n][n2] = np.dot(weights[n-1][0:,n2], alist[n-1]) + biases[n-1][n2]
zlist[n-1][n2] = sigmoid_v(alist[n][n2])
delta = costderivative(alist[-1], answers) * sigmoidprime_v(zlist[-1])
nb[-1] = delta
nw[-1] = np.dot(delta, alist[-1].transpose())
for n in range(2, len(size)):
delta = np.dot(weights[-n+1], delta) * sigmoidprime_v(zlist[-n])
nb[-n] = delta
nw[-n] = np.dot(delta, alist[-n].transpose())
return (nb, nw)
In [379]:
def costderivative(output, answers):
return (output - answers)
In [380]:
def accuracy(inputs, results, answers):
correct = 0
binresults = results
for n in range(0, len(results)):
#converts the output into a binary y/n for each digit
for n2 in range(len(results[n])):
if results[n][n2] == np.amax(results[n]):
binresults[n][n2] = 1
else:
binresults[n][n2] = 0
if np.array_equal(answers[n], binresults[n]):
correct += 1
return correct / len(results)
In [381]:
trainingdata = digits.data[0:100]
traininganswers = digits.target[0:100]
traininganswervectors = np.zeros((100,10))
for n in range(100):
traininganswervectors[n][digits.target[n]] = 1
In [398]:
GradientDescent(trainingdata, traininganswervectors, weights, biases, 5, 0.1, 10)
In [ ]:
In [ ]: