In [1]:
import numpy as np

In [2]:
def d_tanh(y):
    "derivative helper for tanh (uses y = tanh(x))"
    return 1 - y**2

def train( patterns, weights, changes, iterations, N, M):
    """Train a neural network with these parameters:
    N: learning rate
    M: momentum factor
    weights_in_to_hidden, weights_hidden_to_out = weights
    changes_in_to_hidden, changes_hidden_to_out = changes
    """
    for i in range(iterations):
        error = 0.0
        for p in patterns:
            inputs,targets = p
            activations = feed_forward(inputs,weights)
            error_bp, weights, changes = backprop( targets, activations, weights, changes, learning_rate=N, momentum=M)
            error = error + error_bp
        if i % 100 == 0:
            print('error %-.5f' % error)
    return weights,changes

def test(patterns,weights):
    for p in patterns:
        print '{}->{}'.format(p[0][:-1], feed_forward(p[0],weights)[0])

def compute_activations(activations,weights):
    """Compute activations from one layer to the next"""
    weights = np.tanh(np.dot(activations,weights))
    return weights[:]

def feed_forward(input_,weights):
    """Feed input through a mlp with one hidden layer"""
    weights_in_to_hidden, weights_hidden_to_out = weights
    hidden_activations = compute_activations(input_,weights_in_to_hidden)
    output_activations = compute_activations(hidden_activations,weights_hidden_to_out)
    return output_activations, hidden_activations, input_


def backprop(targets,activations,weights,changes,learning_rate,momentum):
    """Backpropagate errors"""
    # expand arguments for readability
    activations_out, activations_hidden, activations_in = activations
    weights_in_to_hidden, weights_hidden_to_out = weights
    changes_in_to_hidden, changes_hidden_to_out = changes

    # compute deltas (for error at each unit)
    error = targets - activations_out ###### .2 = 1 - .8 (NOT -.2 = .8 - 1)
    output_deltas = d_tanh(activations_out) * error
    hidden_deltas = d_tanh(activations_hidden) * np.sum(output_deltas*weights_hidden_to_out)
    
    # compute changes
    new_changes_out = np.kron(activations_hidden,output_deltas).reshape((n_hidden,n_output))
    new_changes_in = np.kron(activations_in,hidden_deltas).reshape((n_input,n_hidden))
    
    # update weights
    new_weights_out = weights_hidden_to_out + learning_rate * new_changes_out + momentum * changes_hidden_to_out
    new_weights_in = weights_in_to_hidden + learning_rate * new_changes_in + momentum * changes_in_to_hidden

    #print "targets:\n {}".format(targets)
    #print "error:\n {}".format(error)
    #print "dtan_h:\n {}".format(d_tanh(activations_out))
    #print "activations_out:\n {}".format(activations_out)
    #print "activations_hidden:\n {}".format(activations_hidden)
    #print
    #print "output_deltas:\n {}".format(output_deltas)
    #print "hidden_deltas:\n {}".format(hidden_deltas)
    #print
    #print "weights_hidden_to_out:\n {}".format(weights_hidden_to_out)
    #print "new_changes_out:\n {}".format(new_changes_out)
    #print "new_weights_out:\n {}".format(new_weights_out)
    #print 
    
    # compute error
    error = np.sum(0.5 * (error ** 2))
    
    return error, (new_weights_in,new_weights_out), (new_changes_in,new_changes_out)

# numbers of input units(+bias), hidden units, and output units
n_input = 2 + 1
n_hidden = 4
n_output = 1

# initialize weights
weights_in_to_hidden = np.random.uniform(-.2,.2,(n_input,n_hidden))
weights_hidden_to_out = np.random.uniform(-.2,.2,(n_hidden,n_output))
weights = weights_in_to_hidden, weights_hidden_to_out

#initialize changes (for momentum)
changes_in_to_hidden = np.zeros((n_input,n_hidden))
changes_hidden_to_out = np.zeros((n_hidden,n_output))
changes = changes_in_to_hidden, changes_hidden_to_out

# XOR
inputs = np.array([[0,0,1],[0,1,1],[1,0,1],[1,1,1]]) # with bias
targets = np.array([[0],[1],[1],[0]])
patterns = zip(inputs,targets)

# train on our data
weights,changes = train(patterns,weights,changes,1000,0.3,0.1)

# test
test(patterns,weights)


error 1.03773
error 0.48469
error 0.38948
error 0.00371
error 0.00129
error 0.00073
error 0.00087
error 0.00378
error 0.00059
error 0.00035
[0 0]->[ 0.02954008]
[0 1]->[ 0.98532427]
[1 0]->[ 0.99036843]
[1 1]->[-0.00027823]

In [3]:
np.random.uniform(-.2,.2,(n_input,n_hidden))


Out[3]:
array([[-0.06668141,  0.12998216,  0.19498572, -0.15074921],
       [ 0.09152674,  0.14717915, -0.14520145, -0.07123759],
       [-0.09505889, -0.03454354,  0.024254  ,  0.01992681]])

In [4]:
np.zeros((n_input,n_hidden))


Out[4]:
array([[ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.]])

In [5]:
zip(inputs,targets)


Out[5]:
[(array([0, 0, 1]), array([0])),
 (array([0, 1, 1]), array([1])),
 (array([1, 0, 1]), array([1])),
 (array([1, 1, 1]), array([0]))]

In [6]:
np.kron(np.array([1,2,3]),np.array([2,3,4,5])).reshape((3,4))


Out[6]:
array([[ 2,  3,  4,  5],
       [ 4,  6,  8, 10],
       [ 6,  9, 12, 15]])

In [ ]: