In [1]:
import numpy as np
def softmax(z):
"""
z: is a array of input
return: softmax correspond to each of the element
"""
return np.exp(z)/np.sum(np.exp(z), 1, keepdims=True)
def sigmoid(z):
return 1/(1+np.exp(-z))
def cost(activations, expectations):
return np.sum(-expectations*np.log(activations))
In [2]:
# prepare the training data
# training input data
input = np.array([[0, 0, 0, 1],
[0, 0, 1, 0],
[0, 0, 1, 1],
[0, 1, 0, 0],
[0, 1, 0, 1],
[0, 1, 1, 0],
[0, 1, 1, 1]])
# training output expectation
output = np.array([[0, 1],
[1, 0],
[0, 1],
[1, 0],
[0, 1],
[1, 0],
[0, 1]])
In [3]:
# construct the network
# input layer: 4 inputs
# hidden layer: 5 neurons with sigmoid as activate function
# * weight: 4x5 matrices
# * bias: 1x5 matrices
# output layer: 2 neurons with softmax as activate function
# * weight: 5x2 matrices
# * bias: 1x2 matrices
# initialize the weight/bias of the hidden layer (2nd layer)
w2 = np.random.rand(4, 5)
b2 = np.random.rand(1, 5)
# initialize the weight/bias of the output layer (3rd layer)
w3 = np.random.rand(5, 2)
b3 = np.random.rand(1, 2)
In [4]:
num_epochs = 10000
eta = 0.1
x=[]
y=[]
for i in xrange(num_epochs):
# feed forward
z2 = np.dot(input, w2) + b2
a2 = sigmoid(z2) # 7x5
z3 = np.dot(a2, w3) + b3
#z3 = np.dot(a2, w3)
a3 = softmax(z3) # 7x2
if i%1000 == 0:
print "Perception", a3
print "W2", w2
print "B2", b2
print "W3", w3
print "B3", b3
x.append(i)
y.append(cost(a3, output))
# 7x2
delta_l3 = a3 - output
deriv_w3 = np.dot(a2.T, delta_l3)
deriv_b3 = delta_l3
w3 -= eta*deriv_w3
b3 -= eta*np.mean(deriv_b3, 0)
delta_l2 = np.dot(delta_l3, w3.T)*(a2*(1-a2)) # 7x5
deriv_w2 = np.dot(input.T, delta_l2)
deriv_b2 = delta_l2
w2 -= eta*deriv_w2
b2 -= eta*np.mean(deriv_b2, 0)
In [5]:
import matplotlib.pyplot as plt
plt.plot(x, y)
plt.xlabel("Epoch")
plt.ylabel("Cost")
plt.show()