In [19]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from IPython.html.widgets import interact

from sklearn.datasets import load_digits
digits = load_digits()

In [20]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

sigmoid_v = np.vectorize(sigmoid)

def sigmoidprime(x):
    return sigmoid(x) * (1 - sigmoid(x))

sigmoidprime_v = np.vectorize(sigmoidprime)

In [21]:
size = [64, 20, 10]

weights = []
for n in range(1, len(size)):
    weights.append(np.random.rand(size[n], size[n-1]) * 2 - 1)

biases = []
for n in range(1, len(size)):
    biases.append(np.random.rand(size[n]) * 2 - 1)

trainingdata = digits.data[0:1200]
traininganswers = digits.target[0:1200]
lc = 0.02

#convert the integer answers into a 10-dimension array
traininganswervectors = np.zeros((1796,10))
for n in range(1796):
    traininganswervectors[n][digits.target[n]] = 1

In [22]:
def feedforward(weights, biases, a):
    b = []
    #first element is inputs "a"
    b.append(a)
    for n in range(1, len(size)):
        #all other elements depend on the number of neurons
        b.append(np.zeros(size[n]))
        for n2 in range(0, size[n]):
            b[n][n2] = sigmoid_v(np.dot(weights[n-1][n2], b[n-1]) + biases[n-1][n2])
      
    return b

In [23]:
opt = feedforward(weights, biases, trainingdata[0])
print(opt[-1])
print(traininganswervectors[0])
print(costderivative(opt[-1], traininganswervectors[0]))


[ 0.95268903  0.15219509  0.37781353  0.0787356   0.89811514  0.20636925
  0.40752871  0.4777203   0.96000983  0.14310585]
[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
[-0.04731097  0.15219509  0.37781353  0.0787356   0.89811514  0.20636925
  0.40752871  0.4777203   0.96000983  0.14310585]

In [24]:
def gradient_descent(weights, biases, inputs, answers, batchsize, lc, epochs):
    for n in range(epochs):
        #pick random locations for input/result data
        locations = np.random.randint(0, len(inputs), batchsize)
        minibatch = []
        #create tuples (inputs, result) based on random locations
        for n2 in range(batchsize):
            minibatch.append((inputs[locations[n2]], answers[locations[n2]]))
        for n3 in range(batchsize):
            weights, biases = train(weights, biases, minibatch, lc)
        
        
        results = []
        for n4 in range(len(trainingdata)):
            results.append(feedforward(weights, biases, inputs[n4])[-1])
            
        accresult = accuracy(inputs, results, answers)
        print("Epoch ", n, " : ", accresult)
        
    return weights, biases

In [25]:
def train(weights, biases, minibatch, lc):
    #set the nabla functions to be the functions themselves initially, same size
    nb = [np.zeros(b.shape) for b in biases]
    nw = [np.zeros(w.shape) for w in weights]
    #largely taken from Michael Nielsen's implementation
    for i, r in minibatch:
        dnb, dnw = backprop(weights, biases, i, r)
        nb = [a+b for a, b in zip(nb, dnb)]
        nw = [a+b for a, b in zip(nw, dnw)]
    
    weights = [w-(lc/len(minibatch))*n_w for w, n_w in zip(weights, nw)]
    biases = [b-(lc/len(minibatch))*n_b for b, n_b in zip(biases, nb)]
    return weights, biases

In [31]:
def backprop(weights, biases, inputs, answers):
    #set the nabla functions to be the same size as functions
    nb = [np.zeros(b.shape) for b in biases]
    nw = [np.zeros(w.shape) for w in weights]
    a = inputs
    alist = [inputs]
    zlist = []
    for b, w in zip(biases, weights):
            z = np.dot(w, a)+b
            zlist.append(z)
            a = sigmoid_v(z)
            alist.append(a)
    
    delta = costderivative(alist[-1], answers) * sigmoidprime_v(zlist[-1])
    nb[-1] = delta
    print("delta", delta)
    print("alist", alist)
    #different from MN, alist[-2] not same size as delta?
    nw[-1] = np.dot(delta, alist[-2].transpose())
    
    for n in range(2, len(size)):
        delta = np.dot(weights[-n+1].transpose(), delta) * sigmoidprime_v(zlist[-n])
        nb[-n] = delta
        #same here
        nw[-n] = np.dot(delta, alist[-n-1].transpose())
    
    return nb, nw

In [32]:
def costderivative(output, answers):
    return (output - answers)

In [33]:
def accuracy(inputs, results, answers):
    correct = 0
    binresults = results
    for n in range(0, len(results)):
        #converts the output into a binary y/n for each digit
        for n2 in range(len(results[n])):
            if results[n][n2] == np.amax(results[n]):
                binresults[n][n2] = 1
            else:
                binresults[n][n2] = 0
        
        if np.array_equal(answers[n], binresults[n]):
            correct += 1
    return correct / len(results)

In [34]:
size = [64, 20, 10]

weights = []
for n in range(1, len(size)):
    weights.append(np.random.rand(size[n], size[n-1]) * 2 - 1)

biases = []
for n in range(1, len(size)):
    biases.append(np.random.rand(size[n]) * 2 - 1)

trainingdata = digits.data[0:500]
traininganswers = digits.target[0:500]

traininganswervectors = np.zeros((500,10))
for n in range(500):
    traininganswervectors[n][digits.target[n]] = 1

In [35]:
final_weights, final_biases = gradient_descent(weights, biases, trainingdata,
                                              traininganswervectors, 5, 1, 100)

print(final_weights)


delta [ 0.07675723  0.02780815  0.0093905   0.13425283  0.03234364  0.14556868
  0.00592747  0.07448789 -0.09589887  0.00622013]
alist [array([  0.,   0.,   5.,  14.,  15.,   2.,   0.,   0.,   0.,   0.,  13.,
        14.,   9.,  10.,   0.,   0.,   0.,   0.,  15.,   8.,   2.,  15.,
         3.,   0.,   0.,   0.,  11.,  12.,   9.,  14.,   2.,   0.,   0.,
         0.,   7.,  16.,  14.,   2.,   0.,   0.,   0.,   0.,  13.,  14.,
        16.,   4.,   0.,   0.,   0.,   3.,  15.,   8.,  14.,  10.,   0.,
         0.,   0.,   0.,   6.,  16.,  16.,   8.,   0.,   0.]), array([  8.71512058e-16,   1.34421193e-03,   1.00000000e+00,
         1.41393057e-02,   1.00000000e+00,   1.00000000e+00,
         1.32874316e-07,   9.99999962e-01,   1.28709646e-20,
         1.00000000e+00,   1.00000000e+00,   6.37924075e-01,
         9.99895559e-01,   1.00000000e+00,   1.00000000e+00,
         9.99999999e-01,   1.00000000e+00,   9.99996665e-01,
         4.54852532e-03,   4.59835385e-04]), array([ 0.90661599,  0.18468104,  0.10227581,  0.77846156,  0.96528839,
        0.61449925,  0.08027991,  0.91006188,  0.60031596,  0.08232965])]
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-35-659cabab3c73> in <module>()
      1 final_weights, final_biases = gradient_descent(weights, biases, trainingdata,
----> 2                                               traininganswervectors, 5, 1, 100)
      3 
      4 print(final_weights)

<ipython-input-24-1fc706aefe9d> in gradient_descent(weights, biases, inputs, answers, batchsize, lc, epochs)
      8             minibatch.append((inputs[locations[n2]], answers[locations[n2]]))
      9         for n3 in range(batchsize):
---> 10             weights, biases = train(weights, biases, minibatch, lc)
     11 
     12 

<ipython-input-25-9425d8ff6f22> in train(weights, biases, minibatch, lc)
      5     #largely taken from Michael Nielsen's implementation
      6     for i, r in minibatch:
----> 7         dnb, dnw = backprop(weights, biases, i, r)
      8         nb = [a+b for a, b in zip(nb, dnb)]
      9         nw = [a+b for a, b in zip(nw, dnw)]

<ipython-input-31-06514cc2ae65> in backprop(weights, biases, inputs, answers)
     18     print("alist", alist)
     19     #different from MN, alist[-2] not same size as delta?
---> 20     nw[-1] = np.dot(delta, alist[-2].transpose())
     21 
     22     for n in range(2, len(size)):

ValueError: shapes (10,) and (20,) not aligned: 10 (dim 0) != 20 (dim 0)

In [ ]:


In [ ]:


In [ ]: