In [1]:
import numpy as np
In [2]:
def d_tanh(y):
"derivative helper for tanh (uses y = tanh(x))"
return 1 - y**2
def train( patterns, weights, changes, iterations, N, M):
"""Train a neural network with these parameters:
N: learning rate
M: momentum factor
weights_in_to_hidden, weights_hidden_to_out = weights
changes_in_to_hidden, changes_hidden_to_out = changes
"""
for i in range(iterations):
error = 0.0
for p in patterns:
inputs,targets = p
activations = feed_forward(inputs,weights)
error_bp, weights, changes = backprop( targets, activations, weights, changes, learning_rate=N, momentum=M)
error = error + error_bp
if i % 100 == 0:
print('error %-.5f' % error)
return weights,changes
def test(patterns,weights):
for p in patterns:
print '{}->{}'.format(p[0][:-1], feed_forward(p[0],weights)[0])
def compute_activations(activations,weights):
"""Compute activations from one layer to the next"""
weights = np.tanh(np.dot(activations,weights))
return weights[:]
def feed_forward(input_,weights):
"""Feed input through a mlp with one hidden layer"""
weights_in_to_hidden, weights_hidden_to_out = weights
hidden_activations = compute_activations(input_,weights_in_to_hidden)
output_activations = compute_activations(hidden_activations,weights_hidden_to_out)
return output_activations, hidden_activations, input_
def backprop(targets,activations,weights,changes,learning_rate,momentum):
"""Backpropagate errors"""
# expand arguments for readability
activations_out, activations_hidden, activations_in = activations
weights_in_to_hidden, weights_hidden_to_out = weights
changes_in_to_hidden, changes_hidden_to_out = changes
# compute deltas (for error at each unit)
error = targets - activations_out ###### .2 = 1 - .8 (NOT -.2 = .8 - 1)
output_deltas = d_tanh(activations_out) * error
hidden_deltas = d_tanh(activations_hidden) * np.sum(output_deltas*weights_hidden_to_out)
# compute changes
new_changes_out = np.kron(activations_hidden,output_deltas).reshape((n_hidden,n_output))
new_changes_in = np.kron(activations_in,hidden_deltas).reshape((n_input,n_hidden))
# update weights
new_weights_out = weights_hidden_to_out + learning_rate * new_changes_out + momentum * changes_hidden_to_out
new_weights_in = weights_in_to_hidden + learning_rate * new_changes_in + momentum * changes_in_to_hidden
#print "targets:\n {}".format(targets)
#print "error:\n {}".format(error)
#print "dtan_h:\n {}".format(d_tanh(activations_out))
#print "activations_out:\n {}".format(activations_out)
#print "activations_hidden:\n {}".format(activations_hidden)
#print
#print "output_deltas:\n {}".format(output_deltas)
#print "hidden_deltas:\n {}".format(hidden_deltas)
#print
#print "weights_hidden_to_out:\n {}".format(weights_hidden_to_out)
#print "new_changes_out:\n {}".format(new_changes_out)
#print "new_weights_out:\n {}".format(new_weights_out)
#print
# compute error
error = np.sum(0.5 * (error ** 2))
return error, (new_weights_in,new_weights_out), (new_changes_in,new_changes_out)
# numbers of input units(+bias), hidden units, and output units
n_input = 2 + 1
n_hidden = 4
n_output = 1
# initialize weights
weights_in_to_hidden = np.random.uniform(-.2,.2,(n_input,n_hidden))
weights_hidden_to_out = np.random.uniform(-.2,.2,(n_hidden,n_output))
weights = weights_in_to_hidden, weights_hidden_to_out
#initialize changes (for momentum)
changes_in_to_hidden = np.zeros((n_input,n_hidden))
changes_hidden_to_out = np.zeros((n_hidden,n_output))
changes = changes_in_to_hidden, changes_hidden_to_out
# XOR
inputs = np.array([[0,0,1],[0,1,1],[1,0,1],[1,1,1]]) # with bias
targets = np.array([[0],[1],[1],[0]])
patterns = zip(inputs,targets)
# train on our data
weights,changes = train(patterns,weights,changes,1000,0.3,0.1)
# test
test(patterns,weights)
In [3]:
np.random.uniform(-.2,.2,(n_input,n_hidden))
Out[3]:
In [4]:
np.zeros((n_input,n_hidden))
Out[4]:
In [5]:
zip(inputs,targets)
Out[5]:
In [6]:
np.kron(np.array([1,2,3]),np.array([2,3,4,5])).reshape((3,4))
Out[6]:
In [ ]: