This project will take hand written digits 0 to 9 and recognize them through a computer-learning program. The neural network will require a training sets to 'teach' the network how to recognize the indiviualites between the diffrent digits and return the proper identification. The Network will be required to know the diffrences between the diffrent styles of handwriting (such as bars or no bars in sevens) and account for other factors such as messy handwriting. these factors will be determined by giving weights to the characteristics of each digits (accounting for various stylization diffrences) to detrmine what factors are important for identification of a digit and what can be given less weight or even ignored in identifcation.
The base question fir this project is taking hand written numbers and recognizing the through a neural network. This will require a computerized learning system that must be trained to recognize the digits. This network should have over 90% accuracy when recognizing hand written digits.
In [1]:
from IPython.display import display
from IPython.display import Image
In [2]:
import numpy as np
import math
import random
from scipy import optimize
from scipy.interpolate import griddata
In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
from IPython.html.widgets import interact
In [4]:
from sklearn.datasets import load_digits
digits = load_digits()
print(digits.data.shape)
In [5]:
a = digits.target
In [6]:
def show_digit(i):
plt.matshow(digits.images[i], cmap='gray');
In [7]:
show_digit(0)
In [8]:
show_digit(1)
In [9]:
show_digit(2)
In [10]:
show_digit(3)
In [11]:
show_digit(4)
In [12]:
show_digit(5)
In [13]:
show_digit(6)
In [14]:
show_digit(7)
In [15]:
show_digit(8)
In [16]:
show_digit(9)
In [45]:
interact(show_digit, i=(0,100));
In [18]:
digits.target
Out[18]:
In [19]:
"""deffintion of layers weights and biases for the network"""
def initialize_weights(layers):
nlayers = len(layers)
weights = []
for i in range(nlayers-1):
w = np.random.randn(layers[i+1],layers[i])
weights.append(w)
return weights
def initialize_biases(layers):
nlayers = len(layers)
biases = []
for i in range(nlayers-1):
b = np.random.randn(layers[i+1])
biases.append(b)
return biases
return weights
def initialize(layers):
nlayers = len(layers)
biases = []
for i in range(nlayers-1):
t = np.random.randn(layers[i+1])
biases.append(b)
y = np.random.randn(layers[i+1],layers[i])
weights.append(w)
return biases, weights
In [20]:
initialize_weights([4,3,5])
Out[20]:
In [21]:
initialize_biases([4,3,5])
Out[21]:
In [22]:
w = initialize_weights([4,3,5])
assert w[0].shape == (3,4)
assert w[1].shape == (5,3)
In [23]:
b = initialize_biases([4,3,5])
assert b[0].shape == (3,)
assert b[1].shape == (5,)
In [24]:
def sigmoid(z):
return 1.0/(1.0+np.exp(-z))
In [25]:
assert sigmoid(0) == 0.5
assert sigmoid(1000) == 1.0
In [26]:
def sigmoid_prime(z):
return sigmoid(z)*(1-sigmoid(z))
In [27]:
assert sigmoid_prime(0) == 0.25
assert sigmoid_prime(1000) == 0.0
In [28]:
"""Return the output if k is the input"""
def feedforward_1(weights, biases, k):
for w, b in zip(weights, biases):
k = sigmoid(np.dot(w, k)+b)
return k
In [29]:
def backprop_1(x, y):
nabla_b = [np.zeros(b.shape) for b in initialize_biases(layers)]
nabla_w = [np.zeros(w.shape) for w in initialize_weights(layers)]
activation = x
activations = [x]
zs = []
for b, w in zip(initialize_biases(layers), initialize_weights(layers)):
z = np.dot(w, activation)+b
zs.append(z)
activation = sigmoid_vec(z)
activations.append(activation)
# backward pass
delta = self.cost_derivative(activations[-1], y) * sigmoid_prime_vec(zs[-1])
nabla_b[-1] = delta
nabla_w[-1] = np.dot(delta, activations[-2].transpose())
for l in range(2, layers):
z = zs[-l]
spv = sigmoid_prime_vec(z)
delta = np.dot(initialize_weights(layers)[-l+1].transpose(), delta) * spv
nabla_b[-l] = delta
nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
return (nabla_b, nabla_w)
In [30]:
Image(url ='https://www.soils.org/images/publications/sssaj/70/6/1851fig1.jpeg', embed = True, width = 700, height = 600)
Out[30]:
In [31]:
def SGD(training_data, epochs, batch_size, test_data=None):
if test_data: n_test = len(test_data)
n = len(training_data)
for j in range(epochs):
random.shuffle(training_data)
for k in range(0, n, batch_size):
mini_batches = [training_data[k:k+batch_size]]
for mini_batch in mini_batches:
update_mini_batch (mini_batch, 1)
if test_data:
print ("Epoch {0}: {1} / {2}".format(j, test_data, n_test))
else:
print ("Epoch {0} complete".format(j))
return mini_batches
In [32]:
"""Up date biases and weights by using backproagation to
each mini_batch from above"""
def batch(mini_batch, rat):
nabla_b = [np.zeros(b.shape) for b in biases]
nabla_w = [np.zeros(w.shape) for w in weights]
for x, y in mini_batch:
delta_nabla_b, delta_nabla_w = self.backprop(x, y)
nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
new_weights = [w-(rat/len(mini_batch))*nw for w, nw in zip(weights, nabla_w)]
new_biases = [b-(rat/len(mini_batch))*nb for b, nb in zip(biases, nabla_b)]
return new_weights, new_biases
In [33]:
def cost_derivative(output_activations, y):
return (output_activations-y)
In [34]:
def test(test_data):
test_results = [(np.argmax(feedforward(x)), y)
for (x, y) in test_data]
return sum(int(x == y) for (x, y) in test_results)
In [35]:
"""Trainnign the network on a portion of the data"""
def train_net(net, nput, target, cycles = 30, dig_cycle = 1400, batch_size = 15, learn_rate = 1):
train_index = np.linspace(0, dig_cycle, dig_cycle + 1)
targ_list = [target(n) for n in target[0:dig_cycle]]
np.random.seed(1)
np.random.shuffle(train_index)
for j in range(cycles):
for n in train_index:
if n+batch_size <= dig_cycle:
train_dat = nput[int(n):int(n+cycles)]
target_data = target_list[int(n):int(n+cycles)]
else:
train_dat = nput[int(n-cycles):dig_cycle]
assert len(train_dat)!=0
target_data = target_list[int(n-cycles):dig_cycle]
stoc_dec(net, train_dat, target_data, learn_rate)
In [36]:
def learn(network, X, y, learning_rate=0.2, epochs=10000):
X = np.atleast_2d(X)
temp = np.ones([X.shape[0], X.shape[1]+1])
temp[:, 0:-1] = X
X = temp
y = np.array(y)
for i in range(epochs):
k = np.random.randint(X.shape[0])
a = [X[i]]
for j in range(len(weights)):
a.append(activation(np.dot(a[j], weights[j])))
error = y[i] - a[-1]
deltas = [error * activation_deriv(a[-1])]
for i in range(len(a) - 2, 0, -1):
deltas.append(deltas[-1].dot(weights[i].T)*activation_deriv(a[i]))
deltas.reverse()
for i in range(len(weights)):
layer = np.atleast_2d(a[i])
delta = np.atleast_2d(deltas[i])
weights[i] += learning_rate * layer.T.dot(delta)
return weights[i]
In [47]:
"""This is the network, it is called Markus!"""
"""This takes all the functions from above and puts them together to train and analyze the data."""
class Network():
def __init__(self, sizes):
self.num_layers = len(sizes)
self.sizes = sizes
self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
self.weights = [np.random.randn(y, x)
for x, y in zip(sizes[:-1], sizes[1:])]
def feedforward(self, a):
for b, w in zip(self.biases, self.weights):
a = sigmoid_vec(np.dot(w, a)+b)
return a
def SGD(self, train_data, epochs, mini_batch_size, eta,test_data=None):
n = len(train_data)
for j in range(epochs):
random.shuffle(train_data)
mini_batches = [
train_data[k:k+mini_batch_size]
for k in range(0, n, mini_batch_size)]
for mini_batch in mini_batches:
self.update_mini_batch(mini_batch, eta)
def update_mini_batch(self, mini_batch, eta):
new_b = [np.zeros(b.shape) for b in self.biases]
new_w = [np.zeros(w.shape) for w in self.weights]
self.weights = [w-(eta/len(mini_batch))*nw for w, nw in zip(self.weights, new_w)]
self.biases = [b-(eta/len(mini_batch))*nb for b, nb in zip(self.biases, new_b)]
def backprop(self, t, q):
new_b = [np.zeros(b.shape) for b in self.biases]
new_w = [np.zeros(w.shape) for w in self.weights]
act = t
acts = [t]
p = []
for b, w in zip(self.biases, self.weights):
z = np.dot(w, act)+b
p.append(z)
act = sigmoid_vec(z)
acts.append(act)
delta = self.cost_derivative(acts[-1], q) * sigmoid_prime_vec(p[-1])
new_b[-1] = delta
new_w[-1] = np.dot(delta, acts[-2].transpose())
for j in range(2, self.num_layers):
z = p[-j]
spv = sigmoid_prime_vec(z)
delta = np.dot(self.weights[-j+1].transpose(), delta) * spv
new_b[-j] = delta
new_w[-j] = np.dot(delta, acts[-j-1].transpose())
return (new_b, new_w)
def evaluate(self, test_data):
test_results = [(np.argmax(self.feedforward(x)), y)
for (x, y) in test_data]
return sum(int(x == y) for (x, y) in test_results)
def cost_derivative(self, output_acts, y):
return (output_acts-y)
#### Miscellaneous functions
def sigmoid(z):
return 1.0/(1.0+np.exp(-z))
sigmoid_vec = np.vectorize(sigmoid)
def sigmoid_prime(z):
return sigmoid(z)*(1-sigmoid(z))
sigmoid_prime_vec = np.vectorize(sigmoid_prime)
In [48]:
Network([5,37,12])
Network
Out[48]:
In [49]:
Image(url ='http://www.saedsayad.com/images/ANN_Sigmoid.png', embed = True, width = 500, height = 400)
Out[49]:
In [50]:
Image(url ='http://www.dplot.com/functions/tanh.png', embed = True, width = 500, height = 400)
Out[50]:
In [51]:
Image(url ='http://www.saedsayad.com/images/ANN_Unit_step.png', embed = True, width = 500, height = 400)
Out[51]:
In [52]:
from sknn.mlp import Classifier, Layer
def example_1(w):
nn = Classifier(
layers=[
Layer("Maxout", units=100, pieces=2),
Layer("Softmax")],
learning_rate=0.001,
n_iter=25)
nn.fit(train, train)
nn.fit(X_train, y_train)
y_example = nn.predict(train)
y_valid = nn.predict(test)
score = nn.score(train, test)
In [53]:
from sknn.mlp import Classifier, Convolution, Layer
def example_2(x):
nn = Classifier(
layers=[
Convolution("Rectifier", channels=8, kernel_shape=(3,3)),
Layer("Softmax")],
learning_rate=0.02,
n_iter=5)
nn.fit(X_train, y_train)
In [54]:
from sknn.mlp import Classifier, Layer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
def example_3(x):
pipeline = Pipeline([
('min/max scaler', MinMaxScaler(feature_range=(0.0, 1.0))),
('neural network', Classifier(layers=[Layer("Softmax")], n_iter=25))])
pipeline.fit(digits.target, digits.target)
I have no ownership nor crated the images, all are courtesy of google.