In [19]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from IPython.html.widgets import interact

from sklearn.datasets import load_digits
digits = load_digits()

In [20]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

sigmoid_v = np.vectorize(sigmoid)

def sigmoidprime(x):
    return sigmoid(x) * (1 - sigmoid(x))

sigmoidprime_v = np.vectorize(sigmoidprime)

In [21]:
size = [64, 20, 10]

weights = []
for n in range(1, len(size)):
    weights.append(np.random.rand(size[n-1], size[n]) * 2 - 1)

biases = []
for n in range(1, len(size)):
    biases.append(np.random.rand(size[n]) * 2 - 1)

trainingdata = digits.data[0:1200]
traininganswers = digits.target[0:1200]
lc = 0.02

#convert the integer answers into a 10-dimension array
traininganswervectors = np.zeros((1796,10))
for n in range(1796):
    traininganswervectors[n][digits.target[n]] = 1

In [22]:
def feedforward(weights, biases, a):
    b = []
    #first element is inputs "a"
    b.append(a)
    for n in range(1, len(size)):
        #all other elements depend on the number of neurons
        b.append(np.zeros(size[n]))
        for n2 in range(0, size[n]):
            b[n][n2] = sigmoid_v(np.dot(weights[n-1][0:,n2], b[n-1]) + biases[n-1][n2])
      
    return b

In [23]:
feedforward(weights, biases, trainingdata[0])


Out[23]:
[array([  0.,   0.,   5.,  13.,   9.,   1.,   0.,   0.,   0.,   0.,  13.,
         15.,  10.,  15.,   5.,   0.,   0.,   3.,  15.,   2.,   0.,  11.,
          8.,   0.,   0.,   4.,  12.,   0.,   0.,   8.,   8.,   0.,   0.,
          5.,   8.,   0.,   0.,   9.,   8.,   0.,   0.,   4.,  11.,   0.,
          1.,  12.,   7.,   0.,   0.,   2.,  14.,   5.,  10.,  12.,   0.,
          0.,   0.,   0.,   6.,  13.,  10.,   0.,   0.,   0.]),
 array([  1.99787812e-32,   9.99824398e-01,   1.41219218e-09,
          2.20180198e-02,   9.90589962e-01,   1.35933990e-07,
          2.64432937e-16,   9.99380312e-01,   1.00000000e+00,
          9.98982691e-01,   1.32704889e-16,   1.85051187e-14,
          8.03537746e-01,   9.95948920e-01,   1.07452260e-03,
          1.00000000e+00,   1.00000000e+00,   7.60908756e-19,
          3.70650629e-10,   9.99999990e-01]),
 array([ 0.54519876,  0.87862537,  0.82055818,  0.10474378,  0.90570466,
         0.17556315,  0.74042021,  0.10678127,  0.07082343,  0.7105419 ])]

In [68]:
def gradient_descent(weights, biases, inputs, answers, batchsize, lc, epochs):
    for n in range(epochs):
        #pick random locations for input/result data
        locations = np.random.randint(0, len(inputs), batchsize)
        minibatch = []
        #create tuples (inputs, result) based on random locations
        for n2 in range(batchsize):
            minibatch.append((inputs[locations[n2]], answers[locations[n2]]))
        for n3 in range(batchsize):
            weights, biases = train(weights, biases, minibatch, lc)
        
        
        results = []
        for n4 in range(len(trainingdata)):
            results.append(feedforward(weights, biases, inputs[n4])[-1])
            
        accresult = accuracy(inputs, results, answers)
        print("Epoch ", n, " : ", accresult)
        
    return weights, biases

In [69]:
def train(weights, biases, minibatch, lc):
    #set the nabla functions to be the functions themselves initially, same size
    nb = [np.zeros(b.shape) for b in biases]
    nw = [np.zeros(w.shape) for w in weights]
    #largely taken from Michael Nielsen's implementation
    for i, r in minibatch:
        dnb, dnw = backprop(weights, biases, i, r)
        nb = [a+b for a, b in zip(nb, dnb)]
        nw = [a+b for a, b in zip(nw, dnw)]
    
    weights = [w-(lc/len(minibatch))*n_w for w, n_w in zip(weights, nw)]
    biases = [b-(lc/len(minibatch))*n_b for b, n_b in zip(biases, nb)]
    return weights, biases

In [ ]:


In [108]:
def backprop(weights, biases, inputs, answers):
    #set the nabla functions to be the same size as functions
    nb = [np.zeros(b.shape) for b in biases]
    nw = [np.zeros(w.shape) for w in weights]
    a = inputs
    alist = [inputs]
    zlist = []
    #from feedforward
    for n in range(1, len(size)):
        #all other elements depend on the number of neurons
        zlist.append(np.zeros(size[n]))
        alist.append(np.zeros(size[n]))
        for n2 in range(1, size[n]):
            zlist[n-1][n2] = np.dot(weights[n-1][0:,n2], alist[n-1]) + biases[n-1][n2]
            alist[n][n2] = sigmoid_v(alist[n-1][n2])
    
    delta = costderivative(alist[-1], answers) * sigmoidprime_v(zlist[-1])
    nb[-1] = delta
    #different from MN, alist[-2] not same size as delta?
    nw[-1] = np.dot(delta, alist[-1].transpose())
    
    for n in range(2, len(size)):
        delta = np.dot(weights[-n+1], delta) * sigmoidprime_v(zlist[-n])
        nb[-n] = delta
        #same here
        nw[-n] = np.dot(delta, alist[-n].transpose())
    
    return nb, nw

In [109]:
def costderivative(output, answers):
    return (output - answers)

In [110]:
def accuracy(inputs, results, answers):
    correct = 0
    binresults = results
    for n in range(0, len(results)):
        #converts the output into a binary y/n for each digit
        for n2 in range(len(results[n])):
            if results[n][n2] == np.amax(results[n]):
                binresults[n][n2] = 1
            else:
                binresults[n][n2] = 0
        
        if np.array_equal(answers[n], binresults[n]):
            correct += 1
    return correct / len(results)

In [111]:
size = [64, 20, 10]

weights = []
for n in range(1, len(size)):
    weights.append(np.random.rand(size[n-1], size[n]) * 2 - 1)

biases = []
for n in range(1, len(size)):
    biases.append(np.random.rand(size[n]) * 2 - 1)

trainingdata = digits.data[0:500]
traininganswers = digits.target[0:500]

traininganswervectors = np.zeros((500,10))
for n in range(500):
    traininganswervectors[n][digits.target[n]] = 1

In [112]:
final_weights, final_biases = gradient_descent(weights, biases, trainingdata,
                                              traininganswervectors, 5, 1, 30)

print(final_weights)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-112-9561e5ad64d1> in <module>()
      1 final_weights, final_biases = gradient_descent(weights, biases, trainingdata,
----> 2                                               traininganswervectors, 5, 1, 30)
      3 
      4 print(final_weights)

<ipython-input-68-1fc706aefe9d> in gradient_descent(weights, biases, inputs, answers, batchsize, lc, epochs)
      8             minibatch.append((inputs[locations[n2]], answers[locations[n2]]))
      9         for n3 in range(batchsize):
---> 10             weights, biases = train(weights, biases, minibatch, lc)
     11 
     12 

<ipython-input-69-9425d8ff6f22> in train(weights, biases, minibatch, lc)
      5     #largely taken from Michael Nielsen's implementation
      6     for i, r in minibatch:
----> 7         dnb, dnw = backprop(weights, biases, i, r)
      8         nb = [a+b for a, b in zip(nb, dnb)]
      9         nw = [a+b for a, b in zip(nw, dnw)]

<ipython-input-108-18e67c6d177d> in backprop(weights, biases, inputs, answers)
      7     zlist = []
      8     for b, w in zip(biases, weights):
----> 9         z = np.dot(w, a)+b
     10         zlist.append(z)
     11         a = sigmoid_v(z)

ValueError: shapes (64,20) and (64,) not aligned: 20 (dim 1) != 64 (dim 0)

In [ ]:


In [ ]: