In [1]:
# install
#Pkg.add("MNIST");
using MNIST
In [2]:
# training data
X,y = traindata();
m = size(X, 2);
In [3]:
inputLayerSize = size(X,1);
hiddenLayerSize = 25;
outputLayerSize = 10;
In [4]:
# representing each output as an array of size of the output layer
Y = zeros(outputLayerSize, m);
for i = 1:m
Y[y[i]+1,i] = 1;
end
In [5]:
function sigmoid(z)
g = 1.0 ./ (1.0 + exp(-z));
return g;
end
Out[5]:
In [6]:
function sigmoidGradient(z)
return sigmoid(z).*(1-sigmoid(z));
end
Out[6]:
In [7]:
# weight regularization parameter
lambda = 2;
function costFunction(truth, prediction)
#cost
cost = zeros(m,1);
for i=1:m
cost[i,:] = (-Y[:,i]'*log(prediction[:,i])) - ((1-Y[:,i]')*log(1-prediction[:,i]));
end
# regularization term
regularization = (lambda/(2*m))*(sum(sum(Theta1[2:end,:].^2)) + sum(sum(Theta2[2:end,:].^2)));
return (1/m)*sum(cost) + regularization; # regularized cost
end
Out[7]:
In [8]:
function predict(Theta1Wt, Theta2Wt, data)
dataSize = size(data, 2);
p = zeros(dataSize, 1);
h1 = sigmoid(Theta1Wt'*[ones(1,size(data,2)), data]);
h2 = sigmoid(Theta2Wt'*[ones(1,size(h1,2)), h1]);
# 1 index is for 0, 2 for 1 ...so forth
for i=1:dataSize
p[i,:] = indmax(h2[:,i])-1;
end
return p;
end
function accuracy(truth, prediction)
dataSize = length(truth);
match =0;
for i=1:dataSize
if truth[i,:] == pred[i,:]
match = match +1;
end
end
return (match/dataSize)*100;
end
Out[8]:
In [ ]:
# including one bias neuron in input layer
# weights for the links connecting input layer to the hidden layer
Theta1 = randn(inputLayerSize+1, hiddenLayerSize); #(785x25)
# including one bias neuron in hidden layer
# weights for the links connecting hidden layer to the output layer
Theta2 = randn(hiddenLayerSize+1, outputLayerSize); #(26x10)
# learning rate
alpha = 0.1;
# number of iterations
epoch = 1000;
# cost per epoch
J = zeros(epoch,1);
# ====================================================================
# Train the neural network using feedforward-backpropagation algorithm
# ====================================================================
for i = 1:epoch
Delta1 = 0;
Delta2 = 0;
for j = 1:m # for each input
# ===================
# Feedforward process
# ===================
# input layer
# add one bias element
x1 = [1, X[:,j]];
# hidden layer
s2 = Theta1'*x1;
x2 = sigmoid(s2);
# add one bias element
x2 = [1, x2];
# output layer
s3 = Theta2'*x2;
x3 = sigmoid(s3);
# =======================
# Backpropagation process
# =======================
# delta for output layer
delta3 = x3 - Y[:,j];
delta2 = (Theta2[2:end,:]*delta3).*sigmoidGradient(s2) ;
# there is no delta term for the input layer
# adjust the weights (thetas)
Delta1 = Delta1 + x1*delta2';
Delta2 = Delta2 + x2*delta3';
end
reg_theta1 = ((lambda/m)*Theta1);
reg_theta1[1,:] = 0;
Theta1 = Theta1 - ((alpha/m)*Delta1 + reg_theta1);
reg_theta2 = ((lambda/m)*Theta2);
reg_theta2[1,:] = 0;
Theta2 = Theta2 - ((alpha/m)* Delta2 + reg_theta2);
h1 = sigmoid(Theta1'*[ones(1,size(X,2)), X]);
h2 = sigmoid(Theta2'*[ones(1,size(h1,2)), h1]);
J[i,:] = costFunction(Y, h2);
end
In [13]:
J
Out[13]:
In [14]:
pred = predict(Theta1, Theta2, X);
println("train accuracy: ", accuracy(y, pred));