In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from IPython.html.widgets import interact

from sklearn.datasets import load_digits
digits = load_digits()


:0: FutureWarning: IPython widgets are experimental and may change in the future.

In [2]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

sigmoid_v = np.vectorize(sigmoid)

def sigmoidprime(x):
    return sigmoid(x) * (1 - sigmoid(x))

sigmoidprime_v = np.vectorize(sigmoidprime)

In [3]:
size = [64, 20, 10]

weights = []
for n in range(1, len(size)):
    weights.append(np.random.rand(size[n], size[n-1]) * 2 - 1)

biases = []
for n in range(1, len(size)):
    biases.append(np.random.rand(size[n]) * 2 - 1)

trainingdata = digits.data[0:1200]
traininganswers = digits.target[0:1200]
lc = 0.02

#convert the integer answers into a 10-dimension array
traininganswervectors = np.zeros((1796,10))
for n in range(1796):
    traininganswervectors[n][digits.target[n]] = 1

In [4]:
def feedforward(weights, biases, a):
    b = []
    #first element is inputs "a"
    b.append(a)
    for n in range(1, len(size)):
        #all other elements depend on the number of neurons
        b.append(np.zeros(size[n]))
        for n2 in range(0, size[n]):
            b[n][n2] = sigmoid_v(np.dot(weights[n-1][n2], b[n-1]) + biases[n-1][n2])
      
    return b

In [5]:
opt = feedforward(weights, biases, trainingdata[0])
print(opt[-1])
print(traininganswervectors[0])
print(costderivative(opt[-1], traininganswervectors[0]))


[ 0.49907837  0.76151195  0.0382227   0.17966434  0.52998773  0.68615384
  0.27139576  0.64373806  0.50529917  0.81352014]
[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-e5821dc7ffd5> in <module>()
      2 print(opt[-1])
      3 print(traininganswervectors[0])
----> 4 print(costderivative(opt[-1], traininganswervectors[0]))

NameError: name 'costderivative' is not defined

In [6]:
def gradient_descent(weights, biases, inputs, answers, batchsize, lc, epochs):
    for n in range(epochs):
        #pick random locations for input/result data
        locations = np.random.randint(0, len(inputs), batchsize)
        minibatch = []
        #create tuples (inputs, result) based on random locations
        for n2 in range(batchsize):
            minibatch.append((inputs[locations[n2]], answers[locations[n2]]))
        for n3 in range(batchsize):
            weights, biases = train(weights, biases, minibatch, lc)
        
        
        results = []
        for n4 in range(len(trainingdata)):
            results.append(feedforward(weights, biases, inputs[n4])[-1])
            
        accresult = accuracy(inputs, results, answers)
        print("Epoch ", n, " : ", accresult)
        
    return weights, biases

In [7]:
def train(weights, biases, minibatch, lc):
    #set the nabla functions to be the functions themselves initially, same size
    nb = [np.zeros(b.shape) for b in biases]
    nw = [np.zeros(w.shape) for w in weights]
    #largely taken from Michael Nielsen's implementation
    for i, r in minibatch:
        dnb, dnw = backprop(weights, biases, i, r)
        nb = [a+b for a, b in zip(nb, dnb)]
        nw = [a+b for a, b in zip(nw, dnw)]
    
    weights = [w-(lc/len(minibatch))*n_w for w, n_w in zip(weights, nw)]
    biases = [b-(lc/len(minibatch))*n_b for b, n_b in zip(biases, nb)]
    return weights, biases

In [24]:
def backprop(weights, biases, inputs, answers):
    #set the nabla functions to be the same size as functions
    nb = [np.zeros(b.shape) for b in biases]
    nw = [np.zeros(w.shape) for w in weights]
    a = inputs
    alist = [inputs]
    zlist = []
    #from feedforward
    for n in range(1, len(size)):
        #all other elements depend on the number of neurons
        zlist.append(np.zeros(size[n]))
        alist.append(np.zeros(size[n]))
        for n2 in range(0, size[n]):
            zlist[n-1][n2] = np.dot(weights[n-1][n2], alist[n-1]) + biases[n-1][n2]
            alist[n][n2] = sigmoid_v(zlist[n-1][n2])
    
    delta = costderivative(alist[-1], answers) * sigmoidprime_v(zlist[-1])
    nb[-1] = delta
    print("delta", delta)
    print("alist", alist)
    #different from MN, alist[-2] not same size as delta?
    nw[-1] = np.dot(delta, alist[-2].transpose())
    
    for n in range(2, len(size)):
        delta = np.dot(weights[-n+1].transpose(), delta) * sigmoidprime_v(zlist[-n])
        nb[-n] = delta
        #same here
        nw[-n] = np.dot(delta, alist[-n-1].transpose())
    
    return nb, nw

In [25]:
def costderivative(output, answers):
    return (output - answers)

In [26]:
def accuracy(inputs, results, answers):
    correct = 0
    binresults = results
    for n in range(0, len(results)):
        #converts the output into a binary y/n for each digit
        for n2 in range(len(results[n])):
            if results[n][n2] == np.amax(results[n]):
                binresults[n][n2] = 1
            else:
                binresults[n][n2] = 0
        
        if np.array_equal(answers[n], binresults[n]):
            correct += 1
    return correct / len(results)

In [27]:
size = [64, 20, 10]

weights = []
for n in range(1, len(size)):
    weights.append(np.random.rand(size[n], size[n-1]) * 2 - 1)

biases = []
for n in range(1, len(size)):
    biases.append(np.random.rand(size[n]) * 2 - 1)

trainingdata = digits.data[0:500]
traininganswers = digits.target[0:500]

traininganswervectors = np.zeros((500,10))
for n in range(500):
    traininganswervectors[n][digits.target[n]] = 1

In [28]:
final_weights, final_biases = gradient_descent(weights, biases, trainingdata,
                                              traininganswervectors, 5, 1, 100)

print(final_weights)


delta [ 0.05565875  0.14136822  0.14797691  0.03152321  0.11198302  0.11895867
  0.13136808  0.11225746 -0.12030967  0.0020779 ]
alist [array([  0.,   0.,   7.,  13.,  15.,   5.,   0.,   0.,   0.,   0.,   8.,
        16.,  16.,  12.,   0.,   0.,   0.,   0.,   7.,  16.,  15.,   3.,
         0.,   0.,   0.,   0.,   6.,  16.,   5.,   0.,   0.,   0.,   0.,
         0.,   5.,  16.,   2.,   0.,   0.,   0.,   0.,   0.,   8.,  16.,
         6.,   0.,   0.,   0.,   0.,   0.,  12.,  12.,  13.,   0.,   0.,
         0.,   0.,   0.,   5.,  13.,  10.,   0.,   0.,   0.]), array([  9.97221876e-01,   1.00000000e+00,   9.40062072e-01,
         1.00000000e+00,   1.13953170e-03,   2.67238421e-15,
         2.82055515e-17,   9.98064586e-01,   4.15846773e-01,
         7.81768934e-09,   1.66491768e-03,   9.50514785e-02,
         8.34626093e-13,   1.00000000e+00,   1.00000000e+00,
         1.14756774e-06,   9.99388897e-01,   9.35530420e-16,
         9.99842734e-01,   3.42241052e-09]), array([ 0.93654325,  0.74592588,  0.65349386,  0.96623516,  0.84207515,
        0.82538401,  0.78894421,  0.45302801,  0.51812791,  0.04668686])]
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-28-659cabab3c73> in <module>()
      1 final_weights, final_biases = gradient_descent(weights, biases, trainingdata,
----> 2                                               traininganswervectors, 5, 1, 100)
      3 
      4 print(final_weights)

<ipython-input-6-1fc706aefe9d> in gradient_descent(weights, biases, inputs, answers, batchsize, lc, epochs)
      8             minibatch.append((inputs[locations[n2]], answers[locations[n2]]))
      9         for n3 in range(batchsize):
---> 10             weights, biases = train(weights, biases, minibatch, lc)
     11 
     12 

<ipython-input-7-9425d8ff6f22> in train(weights, biases, minibatch, lc)
      5     #largely taken from Michael Nielsen's implementation
      6     for i, r in minibatch:
----> 7         dnb, dnw = backprop(weights, biases, i, r)
      8         nb = [a+b for a, b in zip(nb, dnb)]
      9         nw = [a+b for a, b in zip(nw, dnw)]

<ipython-input-24-8a92b8ee7a47> in backprop(weights, biases, inputs, answers)
     20     print("alist", alist)
     21     #different from MN, alist[-2] not same size as delta?
---> 22     nw[-1] = np.dot(delta, alist[-2].transpose())
     23 
     24     for n in range(2, len(size)):

ValueError: shapes (10,) and (20,) not aligned: 10 (dim 0) != 20 (dim 0)

In [ ]:


In [ ]:


In [ ]: