In [44]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from IPython.html.widgets import interact

from sklearn.datasets import load_digits
digits = load_digits()

In [138]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

sigmoid_v = np.vectorize(sigmoid)

def sigmoidprime(x):
    return sigmoid(x) * (1 - sigmoid(x))

sigmoidprime_v = np.vectorize(sigmoidprime)

In [140]:
#64 inputs, 12 hidden, 10 out

weights = [np.random.rand(64, 20) * 2 - 1, np.random.rand(20, 10) * 2 - 1]
biases = [np.random.rand(20) * 2 - 1, np.random.rand(10) * 2 - 1]
trainingdata = digits.data[0:1200]
traininganswers = digits.target[0:1200]
lc = 0.02

traininganswervectors = np.zeros((1796,10))
for n in range(1796):
    traininganswervectors[n][digits.target[n]] = 1

In [141]:
def feedforward(a, weights, biases):
    #for each hidden, calculate the value based on the inputs
    b = np.zeros(20)
    for n in range(20):
        b[n] = sigmoid_v(np.dot(weights[0][0:,n], a)) + biases[0][n]
    
    #for each output, calculate based on hidden
    c = np.zeros(10)
    for n2 in range(10):
        c[n2] = sigmoid_v(np.dot(weights[1][0:,n2], b)) + biases[1][n2]
        
    return c

In [142]:
print(inputs)
feedforward(inputs, weights, biases)


[  0.   0.   5.  13.   9.   1.   0.   0.   0.   0.  13.  15.  10.  15.   5.
   0.   0.   3.  15.   2.   0.  11.   8.   0.   0.   4.  12.   0.   0.   8.
   8.   0.   0.   5.   8.   0.   0.   9.   8.   0.   0.   4.  11.   0.   1.
  12.   7.   0.   0.   2.  14.   5.  10.  12.   0.   0.   0.   0.   6.  13.
  10.   0.   0.   0.]
Out[142]:
array([ 1.43704666,  0.01691983,  0.09181134,  0.98763505,  1.4865199 ,
        0.71999403,  1.19331762,  0.43169264,  0.81842977,  1.12723021])

In [143]:
def GradientDescent(inputs, results, batchsize, lc, epochs):
    for n in range(epochs):
        #pick random locations for input/result data
        locations = np.random.randint(0, len(inputs), batchsize)
        minibatch = []
        #create tuples (inputs, result) based on random locations
        for n2 in batchsize:
            minibatch.append((inputs[locations[n2]], results[locations[n2]]))
        for n3 in range(batchsize):
            train(minibatch[n3], lc)

In [144]:
def train(minibatch, lc):
    nw = weights
    nb = bias
    dnb, dnw = backprop(inputs, results)
    for n in range(len(minibatch)):
        nb = nb+dnb
        nw = nw+dnw
        
    weights = w-(lc/len(minibatch))*nw
    biases = b-(lc/len(minibatch))*nb

In [147]:
def backprop(b, inputs, results):
    nw = weights
    nb = bias
    zlist = []
    alist = []
    for n in range(10):
        zlist[1][n] = np.dot(weights[1][0:,n], b) + biases[1][n]
        alist[1][n] = sigmoid_v(zlist[1][n])
    for n2 in range(20):
        zlist[0][n2] = np.dot(weights[0][0:,n2], b) + biases[0][n2]
        alist = sigmoid_v(zlist[0][n])
        
    delta = costderivative(alist[1], results) * sigmoidprime_v(zlist[1])
    
    nb[1] = delta
    nw[1] = np.dot(delta, alist[1].transpose())
    
    sigprime = sigmoidprime_v(zlist[0])
    delta = np.dot(weights[0], delta) * sigprime
    nb[0] = delta
    nw[0] = np.dot(delta, alist[0].transpose())
    
    return nb, nw

In [ ]:
def costderivative(output, answers):
    return (output - answers)

In [52]:
def accuracy(inputs, results, answers):
    correct = 0
    for n in range(0, len(results)):
        if results[n] == answers[n]:
            correct += 1
    return correct / len(results)

In [ ]:
for n in len(trainingdata):
    ffwd = feedforward(trainingdata[n], weights, biases)
    GradientDescent(ffwd, traininganswervectors[n], 5, 0.05, 10)