This assignment can be done in teams of 2
Student 1: Roan de Jong (10791930)
Student 2: Ghislaine van den Boogerd (student_id)
This notebook provides a template for your programming assignment 3. You may want to use parts of your code from the previous assignment(s) as a starting point for this assignment.
The code you hand-in should follow the structure from this document. Each part of the assignment has its own cell, you are free to add more cells. Note that the structure corresponds with the structure from the actual programming assignment. Make sure you read this for the full explanation of what is expected of you.
Submission:
One way be sure you code can be run without errors is by quiting iPython completely and then restart iPython and run all cells again (you can do this by going to the menu bar above: Cell > Run all). This way you make sure that no old definitions of functions or values of variables are left (that your program might still be using).
If you have any questions ask your teaching assistent. We are here for you.
In [266]:
from __future__ import division
import numpy as np
import pandas as pd
import csv
import math
class logReg:
df = None
input_vars = None
classifying_vars = None
thetas = None
alpha = 0.0
reg_lambda = 0.0
def __init__(self, fileName, alpha, reg_lambda):
self.df = pd.read_csv(fileName, header=None)
length_col = len(self.df[self.df.columns[-1]])
self.classifying_vars = self.df[self.df.columns[-1]].as_matrix()\
.reshape(length_col, 1)
x = self.df[self.df.columns[0:-1]].as_matrix()
# this is the column for x_0
temp_arr = np.ones((1, len(x.T[0])))
for column in x.T:
if column.max(0) > 0:
column = column / column.max(0)
temp_arr = np.vstack((temp_arr, column))
self.input_vars = temp_arr.T
self.thetas = np.full((len(self.input_vars[0]), 1), 0.5)
self.alpha = alpha
self.reg_lambda = reg_lambda
@property
def gradient(self):
theta_x = np.dot(self.input_vars, self.thetas)
# An ugly way to make a np.array
h_x = np.array([0.0])
for example in theta_x:
h_x = np.vstack((h_x, 1 / (1 + math.e**(-example))))
# We added this range to get rid of the useless 1st index: 0.0
return h_x[1:]
# Update the theta's as described in the lecture notes
def update(self, classifier):
output_vars = self.classifying_vars
np.place(output_vars, output_vars != classifier, [0])
np.place(output_vars, output_vars == classifier, [1])
x = self.gradient - output_vars
y = np.dot(self.input_vars.T, x)
self.thetas = self.thetas - self.alpha * y + ((1/self.reg_lambda)*self.thetas)
return self.thetas
# calculate the cost
def cost(self, classifier):
h_x = self.gradient
cost = 0.0
for training_example in zip(h_x, self.classifying_vars):
if training_example[1] == classifier:
cost = cost + math.log(training_example[0])
else:
cost = cost + math.log(1 - training_example[0])
cost = -(1/len(self.classifying_vars)) * cost + (self.reg_lambda/(2*len(self.classifying_vars))* self.thetas.T.dot(self.thetas))
return cost
# train the model on a certain number
def train(self, classifier, iterations):
for i in range(0, iterations):
self.update(classifier)
print(self.cost(classifier))
if __name__ == '__main__':
trainer = logReg('digits123.csv', 0.00002, 200)
trainer.train(3, 100)
b) Two small datasets
In [ ]:
Discussion:
[You discussion comes here]
a) Forward Propagation
In [267]:
from __future__ import division
import numpy as np
import pandas as pd
import csv
import math
class neuralNet:
class logReg:
input_vars = None
output_vars = None
thetas = None
reg_lambda = 0.0
def __init__(self, thetas):
self.thetas = thetas
@property
def gradient(self):
theta_x = np.dot(self.input_vars, self.thetas)
# An ugly way to make a np.array
h_x = np.array([0.0])
for example in theta_x:
h_x = np.vstack((h_x, 1 / (1 + math.e**(-example))))
# We added this range to get rid of the useless 1st index: 0.0
self.output_vars = h_x[1:]
# Update the theta's as described in the lecture notes
def update(self, classifier):
y_vars = self.output_vars
np.place(y_vars, y_vars != classifier, [0])
np.place(y_vars, y_vars == classifier, [1])
x = self.gradient - y
y = np.dot(self.input_vars.T, x)
self.thetas = self.thetas - self.alpha * y
return self.thetas
df = None
input_vars = None
classifying_vars = None
alpha = 0.0
architecture = None
activations = None
def __init__(self, fileName, alpha, architecture):
self.read_data(fileName, alpha)
self.create_architecture(architecture)
def read_data(self, fileName, alpha):
#self.df = pd.read_csv(fileName, header=None)
#length_col = len(self.df[self.df.columns[-1]])
#self.classifying_vars = self.df[self.df.columns[-1]].as_matrix()\
# .reshape(length_col, 1)
#x = self.df[self.df.columns[0:-1]].as_matrix()
# this is the column for x_0
x = np.array([[-5.0]])
#temp_arr = np.ones((1, len(x.T[0])))
#for column in x.T:
# if column.max(0) > 0:
# column = column / column.max(0)
# temp_arr = np.vstack((temp_arr, column))
#self.input_vars = temp_arr.T
self.input_vars = x
self.alpha = alpha
def create_architecture(self, nn_architecture):
architecture = []
input_layer_size = len(self.input_vars[0])
initial_layer = []
for node in range(0, nn_architecture[0]):
thetas = np.array([[0.2]])
#thetas = np.random.rand(input_layer_size, 1)
agent = self.logReg(thetas)
agent.input_vars = self.input_vars
initial_layer.append(agent)
architecture.append(initial_layer)
for layer_size in nn_architecture[1:]:
layer = []
for node in range(0, layer_size):
thetas = np.array([[0.1]])
#thetas = np.random.rand(len(architecture[-1]), 1)
agent = self.logReg(thetas)
layer.append(agent)
architecture.append(layer)
self.architecture = architecture
def forward_prop(self):
activations = []
for layer in self.architecture:
if activations:
activation_layer = np.zeros((len(activations[0]), 1))
else:
activation_layer = np.zeros((len(layer[0].input_vars), 1))
for node in layer:
if activations:
node.input_vars = activations[-1]
node.gradient
activation_layer = np.hstack((activation_layer, node.output_vars))
activations.append(activation_layer[:, 1:])
print(activations)
self.activations = activations
if __name__ == "__main__":
nn = neuralNet('ez_test.csv', 0.01, [1, 1])
nn.forward_prop()
b) Backpropagation on two logistic units
In [268]:
from __future__ import division
import numpy as np
import pandas as pd
import csv
import math
class neuralNet:
class logReg:
input_vars = None
output_vars = None
thetas = None
reg_lambda = 0.0
def __init__(self, thetas):
self.thetas = thetas
@property
def gradient(self):
theta_x = np.dot(self.input_vars, self.thetas)
# An ugly way to make a np.array
h_x = np.array([0.0])
for example in theta_x:
h_x = np.vstack((h_x, 1 / (1 + math.e**(-example))))
# We added this range to get rid of the useless 1st index: 0.0
self.output_vars = h_x[1:]
# Update the theta's as described in the lecture notes
def update(self, classifier):
y_vars = self.output_vars
np.place(y_vars, y_vars != classifier, [0])
np.place(y_vars, y_vars == classifier, [1])
x = self.gradient - y
y = np.dot(self.input_vars.T, x)
self.thetas = self.thetas - self.alpha * y
return self.thetas
df = None
input_vars = None
classifying_vars = None
alpha = 0.0
architecture = None
activations = None
def __init__(self, alpha, architecture):
self.read_data(alpha)
self.create_architecture(architecture)
def read_data(self, alpha):
x = np.array([[-5.0]])
self.input_vars = x
self.classifying_vars = np.array([[1]])
self.alpha = alpha
def create_architecture(self, nn_architecture):
architecture = []
input_layer_size = len(self.input_vars[0])
initial_layer = []
for node in range(0, nn_architecture[0]):
thetas = np.array([[0.5]])
agent = self.logReg(thetas)
agent.input_vars = self.input_vars
initial_layer.append(agent)
architecture.append(initial_layer)
for layer_size in nn_architecture[1:]:
layer = []
for node in range(0, layer_size):
thetas = np.array([[0.5]])
agent = self.logReg(thetas)
layer.append(agent)
architecture.append(layer)
self.architecture = architecture
def forward_prop(self):
activations = []
for layer in self.architecture:
if activations:
activation_layer = np.zeros((len(activations[0]), 1))
else:
activation_layer = np.zeros((len(layer[0].input_vars), 1))
for node in layer:
if activations:
node.input_vars = activations[-1]
node.gradient
activation_layer = np.hstack((activation_layer, node.output_vars))
activations.append(activation_layer[:, 1:])
self.activations = activations
def back_prop(self, classifier):
errors = []
for backprop in zip(reversed(self.architecture), reversed(self.activations)):
if errors:
thetas = np.zeros((len(backprop[0][0].thetas), 1))
for node in backprop[0]:
thetas = np.hstack((thetas, node.thetas))
thetas = thetas[:, 1:]
delta = backprop[1] * (1- backprop[1])
delta = delta.T * np.dot(errors[-1], thetas)
errors.append(delta)
else:
y_vars = self.classifying_vars
np.place(y_vars, y_vars != classifier, [0])
np.place(y_vars, y_vars == classifier, [1])
delta = (backprop[1] - y_vars).T
errors.append(delta)
self.update(reversed(errors))
def update(self, errors):
for layer in zip(reversed(self.architecture), reversed(self.activations), errors):
for node in layer[0]:
node.thetas = node.thetas - self.alpha * (layer[1] * layer[2])
if __name__ == "__main__":
nn = neuralNet(0.001, [1, 1])
for i in range(0, 100):
nn.forward_prop()
nn.back_prop(1)
c) Complete backpropagation on handwritten digit recognition
In [269]:
from __future__ import division
import numpy as np
import pandas as pd
import csv
import math
class neuralNet:
class logReg:
input_vars = None
output_vars = None
thetas = None
reg_lambda = 0.0
def __init__(self, thetas):
self.thetas = thetas
@property
def gradient(self):
theta_x = np.dot(self.input_vars, self.thetas)
# An ugly way to make a np.array
h_x = np.array([0.0])
for example in theta_x:
h_x = np.vstack((h_x, 1 / (1 + math.e**(-example))))
# We added this range to get rid of the useless 1st index: 0.0
self.output_vars = h_x[1:]
df = None
input_vars = None
classifying_vars = None
alpha = 0.0
architecture = None
activations = None
def __init__(self, fileName, alpha, architecture):
self.read_data(fileName, alpha)
self.create_architecture(architecture)
def read_data(self, fileName, alpha):
self.df = pd.read_csv(fileName, header=None)
length_col = len(self.df[self.df.columns[-1]])
self.classifying_vars = self.df[self.df.columns[-1]].as_matrix()\
.reshape(length_col, 1)
x = self.df[self.df.columns[0:-1]].as_matrix()
# this is the column for x_0
temp_arr = np.ones((1, len(x.T[0])))
for column in x.T:
if column.max(0) > 0:
column = column / column.max(0)
temp_arr = np.vstack((temp_arr, column))
self.input_vars = temp_arr[1:].T
self.alpha = alpha
def create_architecture(self, nn_architecture):
architecture = []
input_layer_size = len(self.input_vars[0])
initial_layer = []
for node in range(0, nn_architecture[0]):
thetas = np.random.rand(input_layer_size, 1)
agent = self.logReg(thetas)
agent.input_vars = self.input_vars
initial_layer.append(agent)
architecture.append(initial_layer)
for layer_size in nn_architecture[1:]:
layer = []
for node in range(0, layer_size):
thetas = np.random.rand(len(architecture[-1]), 1)
agent = self.logReg(thetas)
layer.append(agent)
architecture.append(layer)
self.architecture = architecture
self.architecture
def forward_prop(self):
activations = []
for layer in self.architecture:
if activations:
activation_layer = np.zeros((len(activations[0]), 1))
else:
activation_layer = np.zeros((len(layer[0].input_vars), 1))
for node in layer:
if activations:
node.input_vars = activations[-1]
node.gradient
activation_layer = np.hstack((activation_layer, node.output_vars))
activations.append(activation_layer[:, 1:])
self.activations = activations
def back_prop(self, classifier):
errors = []
for backprop in zip(reversed(self.architecture), reversed(self.activations)):
if errors:
thetas = np.zeros((len(backprop[0][0].thetas), 1))
for node in backprop[0]:
thetas = np.hstack((thetas, node.thetas))
thetas = thetas[:, 1:]
# I almost figured out how to perform the operations using Linear Algebra.
# However, my implementation does not work when the dimensions of the inputs of a layer differs from its output
# According to documentation I found, this should work however
# (http://briandolhansky.com/blog/2014/10/30/artificial-neural-networks-matrix-form-part-5)
print thetas.shape
print errors[-1].shape
print backprop[1].shape
delta = np.multiply(backprop[1], (1- backprop[1]))
delta = np.multiply(delta, np.dot(thetas, errors[-1].T).T)
errors.append(delta)
else:
y_vars = self.classifying_vars
np.place(y_vars, y_vars != classifier, [0])
np.place(y_vars, y_vars == classifier, [1])
delta = (backprop[1] - y_vars)
errors.append(delta)
if __name__ == "__main__":
nn = neuralNet('digits123.csv', 0.01, [9, 9, 9])
nn.forward_prop()
nn.back_prop(1)
Discussion:
[You discussion comes here]
In [ ]: