In [19]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from IPython.html.widgets import interact

from sklearn.datasets import load_digits
digits = load_digits()

In [20]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

sigmoid_v = np.vectorize(sigmoid)

def sigmoidprime(x):
    return sigmoid(x) * (1 - sigmoid(x))

sigmoidprime_v = np.vectorize(sigmoidprime)

In [21]:
size = [64, 20, 10]

weights = []
for n in range(1, len(size)):
    weights.append(np.random.rand(size[n-1], size[n]) * 2 - 1)

biases = []
for n in range(1, len(size)):
    biases.append(np.random.rand(size[n]) * 2 - 1)

trainingdata = digits.data[0:1200]
traininganswers = digits.target[0:1200]
lc = 0.02

#convert the integer answers into a 10-dimension array
traininganswervectors = np.zeros((1796,10))
for n in range(1796):
    traininganswervectors[n][digits.target[n]] = 1

In [22]:
def feedforward(weights, biases, a):
    b = []
    #first element is inputs "a"
    b.append(a)
    for n in range(1, len(size)):
        #all other elements depend on the number of neurons
        b.append(np.zeros(size[n]))
        for n2 in range(0, size[n]):
            b[n][n2] = sigmoid_v(np.dot(weights[n-1][0:,n2], b[n-1]) + biases[n-1][n2])
      
    return b

In [115]:
feedforward(weights, biases, trainingdata[0])


Out[115]:
[array([  0.,   0.,   5.,  13.,   9.,   1.,   0.,   0.,   0.,   0.,  13.,
         15.,  10.,  15.,   5.,   0.,   0.,   3.,  15.,   2.,   0.,  11.,
          8.,   0.,   0.,   4.,  12.,   0.,   0.,   8.,   8.,   0.,   0.,
          5.,   8.,   0.,   0.,   9.,   8.,   0.,   0.,   4.,  11.,   0.,
          1.,  12.,   7.,   0.,   0.,   2.,  14.,   5.,  10.,  12.,   0.,
          0.,   0.,   0.,   6.,  13.,  10.,   0.,   0.,   0.]),
 array([  9.90442338e-01,   2.44256528e-09,   1.00000000e+00,
          1.00000000e+00,   5.51988812e-07,   3.78325361e-12,
          1.00000000e+00,   9.99999971e-01,   8.37342711e-01,
          9.99607802e-01,   9.82783336e-01,   9.99997482e-01,
          9.57307764e-01,   1.00000000e+00,   1.00000000e+00,
          1.00000000e+00,   9.84974834e-01,   4.64684702e-03,
          1.00000000e+00,   8.02410452e-03]),
 array([ 0.44779612,  0.87163994,  0.92769695,  0.44619154,  0.06217504,
         0.5942893 ,  0.60903872,  0.9745915 ,  0.09787074,  0.12295571])]

In [116]:
def gradient_descent(weights, biases, inputs, answers, batchsize, lc, epochs):
    for n in range(epochs):
        #pick random locations for input/result data
        locations = np.random.randint(0, len(inputs), batchsize)
        minibatch = []
        #create tuples (inputs, result) based on random locations
        for n2 in range(batchsize):
            minibatch.append((inputs[locations[n2]], answers[locations[n2]]))
        for n3 in range(batchsize):
            weights, biases = train(weights, biases, minibatch, lc)
        
        
        results = []
        for n4 in range(len(trainingdata)):
            results.append(feedforward(weights, biases, inputs[n4])[-1])
            
        accresult = accuracy(inputs, results, answers)
        print("Epoch ", n, " : ", accresult)
        
    return weights, biases

In [117]:
def train(weights, biases, minibatch, lc):
    #set the nabla functions to be the functions themselves initially, same size
    nb = [np.zeros(b.shape) for b in biases]
    nw = [np.zeros(w.shape) for w in weights]
    #largely taken from Michael Nielsen's implementation
    for i, r in minibatch:
        dnb, dnw = backprop(weights, biases, i, r)
        nb = [a+b for a, b in zip(nb, dnb)]
        nw = [a+b for a, b in zip(nw, dnw)]
    
    weights = [w-(lc/len(minibatch))*n_w for w, n_w in zip(weights, nw)]
    biases = [b-(lc/len(minibatch))*n_b for b, n_b in zip(biases, nb)]
    return weights, biases

In [118]:
def backprop(weights, biases, inputs, answers):
    #set the nabla functions to be the same size as functions
    nb = [np.zeros(b.shape) for b in biases]
    nw = [np.zeros(w.shape) for w in weights]
    a = inputs
    alist = [inputs]
    zlist = []
    #from feedforward
    for n in range(1, len(size)):
        #all other elements depend on the number of neurons
        zlist.append(np.zeros(size[n]))
        alist.append(np.zeros(size[n]))
        for n2 in range(1, size[n]):
            zlist[n-1][n2] = np.dot(weights[n-1][0:,n2], alist[n-1]) + biases[n-1][n2]
            alist[n][n2] = sigmoid_v(alist[n-1][n2])
    
    delta = costderivative(alist[-1], answers) * sigmoidprime_v(zlist[-1])
    nb[-1] = delta
    #different from MN, alist[-2] not same size as delta?
    nw[-1] = np.dot(delta, alist[-1].transpose())
    
    for n in range(2, len(size)):
        delta = np.dot(weights[-n+1], delta) * sigmoidprime_v(zlist[-n])
        nb[-n] = delta
        #same here
        nw[-n] = np.dot(delta, alist[-n].transpose())
    
    return nb, nw

In [119]:
def costderivative(output, answers):
    return (output - answers)

In [120]:
def accuracy(inputs, results, answers):
    correct = 0
    binresults = results
    for n in range(0, len(results)):
        #converts the output into a binary y/n for each digit
        for n2 in range(len(results[n])):
            if results[n][n2] == np.amax(results[n]):
                binresults[n][n2] = 1
            else:
                binresults[n][n2] = 0
        
        if np.array_equal(answers[n], binresults[n]):
            correct += 1
    return correct / len(results)

In [124]:
size = [64, 20, 10]

weights = []
for n in range(1, len(size)):
    weights.append(np.random.rand(size[n-1], size[n]) * 2 - 1)

biases = []
for n in range(1, len(size)):
    biases.append(np.random.rand(size[n]) * 2 - 1)

trainingdata = digits.data[0:1000]
traininganswers = digits.target[0:1000]

traininganswervectors = np.zeros((1000,10))
for n in range(1000):
    traininganswervectors[n][digits.target[n]] = 1

In [125]:
final_weights, final_biases = gradient_descent(weights, biases, trainingdata,
                                              traininganswervectors, 5, 1, 30)

print(final_weights)


Epoch  0  :  0.048
Epoch  1  :  0.093
Epoch  2  :  0.095
Epoch  3  :  0.101
Epoch  4  :  0.097
Epoch  5  :  0.095
Epoch  6  :  0.101
Epoch  7  :  0.101
Epoch  8  :  0.105
Epoch  9  :  0.091
Epoch  10  :  0.09
Epoch  11  :  0.091
Epoch  12  :  0.091
Epoch  13  :  0.09
Epoch  14  :  0.092
Epoch  15  :  0.091
Epoch  16  :  0.091
Epoch  17  :  0.091
Epoch  18  :  0.09
Epoch  19  :  0.09
Epoch  20  :  0.089
Epoch  21  :  0.089
Epoch  22  :  0.081
Epoch  23  :  0.14
Epoch  24  :  0.139
Epoch  25  :  0.125
Epoch  26  :  0.117
Epoch  27  :  0.113
Epoch  28  :  0.114
Epoch  29  :  0.114
[array([[ 0.76450383,  0.59251208, -0.33663917, ...,  0.03105159,
        -0.50664191,  0.55243318],
       [-0.32381734, -0.19473504,  0.47964496, ..., -0.74232456,
         0.63391058,  0.11945287],
       [ 0.6231575 ,  0.86058574,  0.88342131, ..., -0.22242994,
         0.25655237, -0.23629923],
       ..., 
       [-0.36932122, -0.3959529 ,  0.81687002, ..., -0.97764035,
        -0.32230678,  0.12894721],
       [ 0.3298412 ,  0.6209686 ,  0.84756657, ..., -0.12449487,
         0.39450935, -0.18668442],
       [-0.95583099,  0.08569311,  0.71531   , ...,  0.87901573,
        -0.25818984, -0.67319341]]), array([[ 0.14131349, -1.45527079, -0.54529945, -1.0817927 , -1.63675429,
        -0.82509237, -0.50577114, -1.25099751, -1.57167739, -1.58229838],
       [-0.26268802, -0.46828519, -0.27403453, -0.27378482, -0.11569697,
        -1.14896415, -1.0756586 , -1.00494417, -1.1274975 , -1.18015684],
       [-0.02772253,  0.15362551, -0.73992948, -0.38885413,  0.10669895,
        -1.04529539, -0.51262275,  0.04903211,  0.07113167, -0.95034216],
       [-1.10066271, -0.4192437 , -0.83153843, -0.08715551,  0.0762273 ,
        -1.61585745, -0.07269733,  0.22141072, -1.47321102, -1.06635828],
       [-1.53605171, -0.20236728, -1.59564818, -0.7736486 , -0.85743672,
        -1.65075395,  0.22338501, -0.41452082, -1.19489058, -0.28412976],
       [-0.97130133, -1.26394292, -0.86485605,  0.21719278,  0.04154922,
        -0.56971456, -1.64255665, -1.71024067, -1.65795937, -1.58724435],
       [-1.17607687, -1.66427757, -1.26226142,  0.03540448, -0.09032715,
        -0.57956052, -1.30498373,  0.19528167, -0.61780775, -1.71499853],
       [-1.32944467, -0.58069153, -0.7444898 ,  0.13856652, -1.36328519,
        -0.13302136, -1.30499062, -0.1462735 ,  0.09600149, -1.51221257],
       [-1.2194706 , -0.23253221, -0.66908131, -0.61349542, -0.69434693,
        -0.5319174 , -0.53994301, -1.40761353, -0.77824655, -0.47336865],
       [-0.57874113, -1.45537269, -0.52541559,  0.00194201, -1.72241672,
        -0.15125062, -1.50899977, -1.31169813, -0.38594925,  0.17597085],
       [-1.15019981,  0.15592889, -0.31838663, -0.85503774, -1.15652188,
        -0.19242718, -0.88554113, -0.27003906, -0.59227189, -0.38658356],
       [-0.19594764, -0.27451577,  0.12450115, -1.16536117,  0.22683144,
        -1.45717936,  0.05982142, -0.48704248,  0.16055401,  0.07586088],
       [-1.31938199, -0.15037955, -0.54360006, -0.16815748, -0.53387818,
        -1.06238962, -0.71740935, -1.72122009, -0.43972491, -0.02027598],
       [-1.5115723 , -0.84107486, -1.14661439, -1.11010468, -0.39087975,
         0.10574395, -1.70313065, -1.37999991, -1.2981514 , -0.4824089 ],
       [ 0.17303988, -0.90082792, -0.36351988, -1.04551961, -0.80590839,
         0.18561133,  0.1314899 , -0.74618971, -0.60030412, -1.45901202],
       [ 0.25517938, -1.462488  , -1.29089867, -0.52794909,  0.10719908,
        -0.02585236, -0.37550124, -0.59261764, -1.13334063, -0.21033767],
       [-0.43664434, -1.39479159, -1.69268381, -0.94948049, -1.27133524,
        -0.70130297, -1.33301927, -0.81116035, -1.07527586, -1.66143407],
       [-0.45148065, -0.91159968, -1.03189045, -1.10330699, -0.22954374,
        -1.23667512, -1.15779083, -1.13007106,  0.11576962, -1.59387454],
       [-1.2342925 ,  0.22825142, -0.52002247, -0.94077492, -1.31173937,
        -0.04128284, -0.60117536, -0.52280868, -0.14590897, -0.16754535],
       [-1.38041727,  0.0245013 , -0.88616725, -1.41358959, -0.71826232,
        -0.9898886 , -1.22985433, -0.52484158, -1.2708501 , -1.64708579]])]

In [ ]:


In [ ]: