Neural Networks

Sara Jones

Abstract

This project will take hand written digits 0 to 9 and recognize them through a computer-learning program. The neural network will require a training sets to 'teach' the network how to recognize the indiviualites between the diffrent digits and return the proper identification. The Network will be required to know the diffrences between the diffrent styles of handwriting (such as bars or no bars in sevens) and account for other factors such as messy handwriting. these factors will be determined by giving weights to the characteristics of each digits (accounting for various stylization diffrences) to detrmine what factors are important for identification of a digit and what can be given less weight or even ignored in identifcation.

Base question

The base question fir this project is taking hand written numbers and recognizing the through a neural network. This will require a computerized learning system that must be trained to recognize the digits. This network should have over 90% accuracy when recognizing hand written digits.


In [1]:
from IPython.display import display
from IPython.display import Image

In [2]:
import numpy as np
import math
import random
from scipy import optimize
from scipy.interpolate import griddata

In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
from IPython.html.widgets import interact


:0: FutureWarning: IPython widgets are experimental and may change in the future.

In [4]:
from sklearn.datasets import load_digits
digits = load_digits()
print(digits.data.shape)


(1797, 64)

In [5]:
a = digits.target

In [6]:
def show_digit(i):
    plt.matshow(digits.images[i], cmap='gray');

In [7]:
show_digit(0)



In [8]:
show_digit(1)



In [9]:
show_digit(2)



In [10]:
show_digit(3)



In [11]:
show_digit(4)



In [12]:
show_digit(5)



In [13]:
show_digit(6)



In [14]:
show_digit(7)



In [15]:
show_digit(8)



In [16]:
show_digit(9)



In [17]:
interact(show_digit, i=(0,100));



In [18]:
digits.target


Out[18]:
array([0, 1, 2, ..., 8, 9, 8])

In [19]:
"""deffintion of layers weights and biases for the network"""


def initialize_weights(layers):
    nlayers = len(layers)
    weights = []
    for i in range(nlayers-1):
        w = np.random.randn(layers[i+1],layers[i])
        weights.append(w)
    return weights

def initialize_biases(layers):
    nlayers = len(layers)
    biases = []
    for i in range(nlayers-1):
        b = np.random.randn(layers[i+1])
        biases.append(b)
    return biases

    return weights

def initialize(layers):
    nlayers = len(layers)
    biases = []
    for i in range(nlayers-1):
        t = np.random.randn(layers[i+1])
        biases.append(b)
        y = np.random.randn(layers[i+1],layers[i])
        weights.append(w)
    return biases, weights

In [20]:
initialize_weights([4,3,5])


Out[20]:
[array([[-2.46300409,  1.06667564, -0.06314531,  0.0196614 ],
        [ 0.60701328, -0.65577449,  0.20831386, -0.75966499],
        [ 0.40467794, -0.12545396,  1.52203111, -0.1519065 ]]),
 array([[-2.10665881,  0.86922705,  0.23413697],
        [-0.23998871, -0.06256537,  0.09722866],
        [ 0.92264316,  0.90868384,  0.15553074],
        [-0.37892518, -0.18286887,  1.93648099],
        [ 0.42631645,  0.55161602,  0.59971509]])]

In [21]:
initialize_biases([4,3,5])


Out[21]:
[array([-0.91125265,  1.05912391,  0.62763642]),
 array([-3.20344516,  0.04504446,  0.57331792, -0.91206437,  0.87429496])]

In [22]:
w = initialize_weights([4,3,5])
assert w[0].shape == (3,4)
assert w[1].shape == (5,3)

In [23]:
b = initialize_biases([4,3,5])
assert b[0].shape == (3,)
assert b[1].shape == (5,)

In [24]:
def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

In [25]:
assert sigmoid(0) == 0.5 
assert sigmoid(1000) == 1.0

In [26]:
def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

In [27]:
assert sigmoid_prime(0) == 0.25
assert sigmoid_prime(1000) == 0.0

In [28]:
"""Return the output if k is the input""" 
def feedforward_1(weights, biases, k):
    for w, b in zip(weights, biases):
        k = sigmoid(np.dot(w, k)+b)
    return k

In [29]:
def backprop_1(x, y):
    nabla_b = [np.zeros(b.shape) for b in initialize_biases(layers)]
    nabla_w = [np.zeros(w.shape) for w in initialize_weights(layers)]
    activation = x
    activations = [x] 
    zs = []
    for b, w in zip(initialize_biases(layers), initialize_weights(layers)):
        z = np.dot(w, activation)+b
        zs.append(z)
        activation = sigmoid_vec(z)
        activations.append(activation)
        # backward pass
    delta = self.cost_derivative(activations[-1], y) * sigmoid_prime_vec(zs[-1])
    nabla_b[-1] = delta
    nabla_w[-1] = np.dot(delta, activations[-2].transpose())
    for l in range(2, layers):
        z = zs[-l]
        spv = sigmoid_prime_vec(z)
        delta = np.dot(initialize_weights(layers)[-l+1].transpose(), delta) * spv
        nabla_b[-l] = delta
        nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
    return (nabla_b, nabla_w)

In [30]:
Image(url ='https://www.soils.org/images/publications/sssaj/70/6/1851fig1.jpeg', embed = True, width = 700, height = 600)


Out[30]:

In [31]:
def SGD(training_data, epochs, batch_size, test_data=None):
    if test_data: n_test = len(test_data)
    n = len(training_data)
    for j in range(epochs):
        random.shuffle(training_data)
        for k in range(0, n, batch_size):
            mini_batches = [training_data[k:k+batch_size]]
        for mini_batch in mini_batches:
                update_mini_batch (mini_batch, 1)
        if test_data:
            print ("Epoch {0}: {1} / {2}".format(j, test_data, n_test))
        else:
            print ("Epoch {0} complete".format(j))
    return mini_batches

In [32]:
"""Up date biases and weights by using backproagation to
each mini_batch from above"""
def batch(mini_batch, rat):
    nabla_b = [np.zeros(b.shape) for b in biases]
    nabla_w = [np.zeros(w.shape) for w in weights]
    for x, y in mini_batch:
        delta_nabla_b, delta_nabla_w = self.backprop(x, y)
        nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
        nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
    new_weights = [w-(rat/len(mini_batch))*nw for w, nw in zip(weights, nabla_w)]
    new_biases = [b-(rat/len(mini_batch))*nb for b, nb in zip(biases, nabla_b)]
    return new_weights, new_biases

In [33]:
def cost_derivative(output_activations, y):
    return (output_activations-y)

In [34]:
def test(test_data):
        test_results = [(np.argmax(feedforward(x)), y) 
            for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)

In [35]:
"""Trainnign the network on a portion of the data"""

def train_net(net, nput, target, cycles = 30, dig_cycle = 1400, batch_size = 15, learn_rate = 1):
    train_index = np.linspace(0, dig_cycle, dig_cycle + 1)
    targ_list = [target(n) for n in target[0:dig_cycle]]
    np.random.seed(1)
    np.random.shuffle(train_index)
    for j in range(cycles):
        for n in train_index:
            if n+batch_size <= dig_cycle:
                train_dat = nput[int(n):int(n+cycles)]
                target_data = target_list[int(n):int(n+cycles)]
            else:    
                train_dat = nput[int(n-cycles):dig_cycle]
                assert len(train_dat)!=0
                target_data = target_list[int(n-cycles):dig_cycle]
            stoc_dec(net, train_dat, target_data, learn_rate)

In [36]:
def learn(network, X, y, learning_rate=0.2, epochs=10000):
    X = np.atleast_2d(X)
    temp = np.ones([X.shape[0], X.shape[1]+1])
    temp[:, 0:-1] = X  
    X = temp
    y = np.array(y)

    for i in range(epochs):
        k = np.random.randint(X.shape[0])
        a = [X[i]]

        for j in range(len(weights)):
            a.append(activation(np.dot(a[j], weights[j])))
        error = y[i] - a[-1]
        deltas = [error * activation_deriv(a[-1])]

        for i in range(len(a) - 2, 0, -1):
            deltas.append(deltas[-1].dot(weights[i].T)*activation_deriv(a[i]))
        deltas.reverse()
        
        for i in range(len(weights)):
            layer = np.atleast_2d(a[i])
            delta = np.atleast_2d(deltas[i])
            weights[i] += learning_rate * layer.T.dot(delta)
        
        return weights[i]

In [37]:
"""This is the network, it is called Fluffles!"""
"""This takes all the functions from above and puts them together to train and analyze the data."""


class Network():

    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x) 
                        for x, y in zip(sizes[:-1], sizes[1:])]

    def feedforward(self, a):
        for b, w in zip(self.biases, self.weights):
            a = sigmoid_vec(np.dot(w, a)+b)
        return a

    def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None):

        if test_data: n_test = len(test_data)
        n = len(training_data)
        for j in range(epochs):
            random.shuffle(training_data)
            mini_batches = [
                training_data[k:k+mini_batch_size]
                for k in range(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)


    def update_mini_batch(self, mini_batch, eta):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [w-(eta/len(mini_batch))*nw 
                        for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b-(eta/len(mini_batch))*nb 
                       for b, nb in zip(self.biases, nabla_b)]

    def backprop(self, x, y):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        activation = x
        activations = [x] 
        zs = [] 
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation)+b
            zs.append(z)
            activation = sigmoid_vec(z)
            activations.append(activation)
        delta = self.cost_derivative(activations[-1], y) * \
            sigmoid_prime_vec(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        z = zs[-l]
        spv = sigmoid_prime_vec(z)
        delta = np.dot(self.weights[-l+1].transpose(), delta) * spv
        nabla_b[-l] = delta
        nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
        return (nabla_b, nabla_w)

    def evaluate(self, test_data):
        test_results = [(np.argmax(self.feedforward(x)), y) 
                        for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)
        
    def cost_derivative(self, output_activations, y):
        return (output_activations-y) 

def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

sigmoid_vec = np.vectorize(sigmoid)

def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

sigmoid_prime_vec = np.vectorize(sigmoid_prime)

In [38]:
Network([5,37,12])
Network


Out[38]:
__main__.Network

Sigmoid Functions

These functions introduce a non-linear factor in to the neural network allowing it to combine and use non-linear terms. Without the sigmoid the network would only be able to process and learn from leaner systems or combinations of linear systems.


In [39]:
Image(url ='http://www.saedsayad.com/images/ANN_Sigmoid.png', embed = True, width = 500, height = 400)


Out[39]:

In [40]:
Image(url ='http://www.dplot.com/functions/tanh.png', embed = True, width = 500, height = 400)


Out[40]:

In [41]:
Image(url ='http://www.saedsayad.com/images/ANN_Unit_step.png', embed = True, width = 500, height = 400)


Out[41]:

Neural Network libraries


In [42]:
from sknn.mlp import Classifier, Layer

def example_1(w):
    nn = Classifier(
        layers=[
            Layer("Maxout", units=100, pieces=2),
            Layer("Softmax")],
        learning_rate=0.001,
        n_iter=25)
    nn.fit(train, train)
    nn.fit(X_train, y_train)

    y_example = nn.predict(train)
    y_valid = nn.predict(test)

    score = nn.score(train, test)

In [43]:
from sknn.mlp import Classifier, Convolution, Layer

def example_2(x):
    nn = Classifier(
        layers=[
            Convolution("Rectifier", channels=8, kernel_shape=(3,3)),
            Layer("Softmax")],
        learning_rate=0.02,
        n_iter=5)
    nn.fit(X_train, y_train)

In [44]:
from sknn.mlp import Classifier, Layer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

def example_3(x):
    pipeline = Pipeline([
        ('min/max scaler', MinMaxScaler(feature_range=(0.0, 1.0))),
        ('neural network', Classifier(layers=[Layer("Softmax")], n_iter=25))])
    pipeline.fit(digits.target, digits.target)

I have no ownership nor crated the images, all are courtesy of google.