In [1]:
import copy, numpy as np
np.random.seed(0)

def sigmoid(x):
    output = 1/(1+np.exp(-x))
    return output

def sigmoid_output_to_derivative(output):
    return output*(1-output)

In [2]:
int2binary = {}
binary_dim = 8


largest_number = pow(2,binary_dim)
binary = np.unpackbits(
    np.array([range(largest_number)],dtype=np.uint8).T,axis=1)
for i in range(largest_number):
    int2binary[i] = binary[i]

alpha = 0.1
input_dim = 2
hidden_dim = 16
output_dim = 1

synapse_0 = 2*np.random.random((input_dim,hidden_dim)) - 1
synapse_1 = 2*np.random.random((hidden_dim,output_dim)) - 1
synapse_h = 2*np.random.random((hidden_dim,hidden_dim)) - 1

synapse_0_update = np.zeros_like(synapse_0)
synapse_1_update = np.zeros_like(synapse_1)
synapse_h_update = np.zeros_like(synapse_h)

In [3]:
for j in range(10000):
    a_int = np.random.randint(largest_number/2)
    a = int2binary[a_int]
    b_int = np.random.randint(largest_number/2)
    b = int2binary[b_int]
    c_int = a_int + b_int
    c = int2binary[c_int]

    d = np.zeros_like(c)
    overallError = 0

    layer_2_deltas = list()
    layer_1_values = list()
    layer_1_values.append(np.zeros(hidden_dim))
    for position in range(binary_dim):
        X = np.array([[a[binary_dim - position - 1],b[binary_dim - position - 1]]])
        y = np.array([[c[binary_dim - position - 1]]]).T
        layer_1 = sigmoid(np.dot(X,synapse_0) + np.dot(layer_1_values[-1],synapse_h))
        layer_2 = sigmoid(np.dot(layer_1,synapse_1))

        layer_2_error = y - layer_2
        layer_2_deltas.append((layer_2_error)*sigmoid_output_to_derivative(layer_2))
        overallError += np.abs(layer_2_error[0])
    
        d[binary_dim - position - 1] = np.round(layer_2[0][0])
        layer_1_values.append(copy.deepcopy(layer_1))

    future_layer_1_delta = np.zeros(hidden_dim)
    for position in range(binary_dim):
        X = np.array([[a[position],b[position]]])
        layer_1 = layer_1_values[-position-1]
        prev_layer_1 = layer_1_values[-position-2]
        
        layer_2_delta = layer_2_deltas[-position-1]
        layer_1_delta = (future_layer_1_delta.dot(synapse_h.T) + layer_2_delta.dot(synapse_1.T)) * sigmoid_output_to_derivative(layer_1)

        synapse_1_update += np.atleast_2d(layer_1).T.dot(layer_2_delta)
        synapse_h_update += np.atleast_2d(prev_layer_1).T.dot(layer_1_delta)
        synapse_0_update += X.T.dot(layer_1_delta)
        
        future_layer_1_delta = layer_1_delta
    synapse_0 += synapse_0_update * alpha
    synapse_1 += synapse_1_update * alpha
    synapse_h += synapse_h_update * alpha    

    synapse_0_update *= 0
    synapse_1_update *= 0
    synapse_h_update *= 0
    
    if(j % 1000 == 0):
        print("Error:{}\tPred:{}\tTrue:{}".format(str(overallError),
                                                  str(d),
                                                  str(c)))
        out = 0
        for index,x in enumerate(reversed(d)):
            out += x*pow(2,index)
        print(str(a_int) + " + " + str(b_int) + " = " + str(out))
        print("------------")


Error:[ 0.3269932]	Pred:[1 0 0 0 1 0 0 0]	True:[1 0 0 0 1 0 0 0]
74 + 62 = 136
------------
Error:[ 0.19395382]	Pred:[1 0 1 0 1 1 1 1]	True:[1 0 1 0 1 1 1 1]
108 + 67 = 175
------------
Error:[ 0.16259731]	Pred:[1 0 0 0 1 1 1 1]	True:[1 0 0 0 1 1 1 1]
78 + 65 = 143
------------
Error:[ 0.15228023]	Pred:[1 0 0 1 0 1 1 0]	True:[1 0 0 1 0 1 1 0]
36 + 114 = 150
------------
Error:[ 0.16289552]	Pred:[0 1 0 1 0 0 1 1]	True:[0 1 0 1 0 0 1 1]
24 + 59 = 83
------------
Error:[ 0.18106679]	Pred:[0 0 1 1 0 1 1 1]	True:[0 0 1 1 0 1 1 1]
8 + 47 = 55
------------
Error:[ 0.12223099]	Pred:[0 0 1 1 0 1 1 0]	True:[0 0 1 1 0 1 1 0]
23 + 31 = 54
------------
Error:[ 0.09986539]	Pred:[0 1 1 0 1 1 0 1]	True:[0 1 1 0 1 1 0 1]
36 + 73 = 109
------------
Error:[ 0.13762796]	Pred:[1 1 1 0 1 0 1 1]	True:[1 1 1 0 1 0 1 1]
114 + 121 = 235
------------
Error:[ 0.10310988]	Pred:[0 0 1 1 1 1 1 1]	True:[0 0 1 1 1 1 1 1]
6 + 57 = 63
------------

In [ ]: