In [162]:
import copy, numpy as np

In [163]:
np.random.seed(0)

def sigmoid(x):
    return 1/(1+np.exp(-x))

def sigmoid_dev(y):
    return y*(1-y)

def softplus(x):
    return np.log(1+np.exp(x))

def softplus_dev(y):
    return 1/(1+np.exp(-y))

In [164]:
int2bin = {}
bin_dim = 8
N = pow(2, bin_dim)

binary = np.unpackbits(
    np.array([range(N)], dtype=np.uint8).T, 
    axis=1
)
print(binary)


[[0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 1]
 [0 0 0 ..., 0 1 0]
 ..., 
 [1 1 1 ..., 1 0 1]
 [1 1 1 ..., 1 1 0]
 [1 1 1 ..., 1 1 1]]

In [165]:
alpha = 0.1
nn_dim = [3, 8, 2]
input_dim = nn_dim[0]
output_dim = nn_dim[-1]

num_layers = len(nn_dim)

s = list()
for i in range(num_layers - 1):
    s.append(2 * np.random.random((nn_dim[i], nn_dim[i+1])) - 1)

u = list()
for i in range(num_layers - 1):
    u.append(np.zeros((nn_dim[i], nn_dim[i+1])))

In [166]:
def run_nn(s, X):
    out = list()
    out.append(copy.deepcopy(X))
    layer = X
    for mat in s:
        layer = sigmoid(np.dot(out[-1], mat))
        out.append(copy.deepcopy(layer))
    return out

def bp_nn(s, X, Y):
    layer_v = run_nn(s, X)
    
    u = list()
    for i in range(num_layers - 1):
        u.append(np.zeros((nn_dim[i], nn_dim[i+1])))
    
    dets = list()
    det = (Y - layer_v[-1]) * sigmoid_dev(layer_v[-1])
    dets.append(copy.deepcopy(det))
    
    for i in range(1, len(s) + 1):
        mat = s[-i]
        det = dets[-1].dot(mat.T) * sigmoid_dev(layer_v[-1-i])
        dets.append(copy.deepcopy(det))
        
    for i in range(0, len(s)):
        u[i] += np.tensordot(layer_v[i], dets[-2-i], axes=0)
    
    return u, layer_v

In [171]:
def train():
    for j in range(20000):

        a_int = np.random.randint(N // 2)
        a = binary[a_int]

        b_int = np.random.randint(N // 2)
        b = binary[b_int]

        c_int = a_int + b_int
        c = binary[c_int]

        d = np.zeros_like(c)

        overallErr = 0

        layer_vals = list()

        h_val = 0

        for p in range(bin_dim):
            X = np.array([a[-1 - p], b[-1 - p], h_val])
            Y = np.array([c[-1 - p]])

            layer_v = run_nn(s, X)
            layer_vals.append(layer_v)

            h_val = layer_v[-1][-1]

            d[-1-p] = np.round(layer_v[-1][0])
            overallErr += np.abs(c[-1 - p] - layer_v[-1][0])
        #print(layer_vals)
        future_h_d = 0
        for p in range(bin_dim):
            layer_v = layer_vals[-1 - p]
            X = layer_v[0]
            #print(layer_v[-1])
            dets = list()

            d_d = (c[p] - layer_v[-1][0]) * sigmoid_dev(layer_v[-1][0])

            if p == 0:
                h_d = (0 - layer_v[-1][-1]) * sigmoid_dev(layer_v[-1][-1])
            else:
                h_d = future_h_d

            dets.append(np.array([d_d, h_d]))

            for i in range(1, len(s) + 1):
                mat = s[-i]
                det = dets[-1].dot(mat.T) * sigmoid_dev(layer_v[-1-i])
                dets.append(copy.deepcopy(det))

            future_h_d = dets[-1][-1]

            for i in range(0, len(s)):
                u[i] += np.tensordot(layer_v[i], dets[-2-i], axes=0)
        for i in range(0, len(s)):
            s[i] += u[i] * alpha
            u[i] *= 0
        if j % 5000 == 0:
            print("Error:", overallErr)
            print("Pred:", d)
            print("True:", c)
            #print(s)
train()


Error: 0.254437759353
Pred: [0 1 1 1 1 1 1 1]
True: [0 1 1 1 1 1 1 1]
Error: 0.190465048641
Pred: [1 0 0 1 0 1 0 0]
True: [1 0 0 1 0 1 0 0]
Error: 0.169893630973
Pred: [0 1 1 1 0 0 0 1]
True: [0 1 1 1 0 0 0 1]
Error: 0.164652831298
Pred: [0 1 1 1 1 1 0 1]
True: [0 1 1 1 1 1 0 1]

In [172]:
def fastTrain():
    for i in range(num_layers - 1):
        u[i] *= 0
    for j in range(20000):
        err = 0.0
        for x in range(8):
            a = x % 2
            b = (x // 2) % 2
            c = (x // 4) % 2
            d = a + b + c
            d1 = d % 2
            d2 = (d // 2) % 2
            X = np.array([a, b, c])
            Y = np.array([d1, d2])
            ux, layer_v = bp_nn(s, X, Y)
            err += np.sum(np.abs(Y - layer_v[-1]))
            for i in range(num_layers - 1):
                u[i] += ux[i]
        for i in range(0, len(s)):
            s[i] += u[i] * alpha
            u[i] *= 0
        if j % 5000 == 0:
            print("Error:", err)
#fastTrain()

for x in range(8):
    x1 = x % 2
    x2 = (x // 2) % 2
    x3 = (x // 4) % 2
    y = x1 + x2 + x3
    y1 = y % 2
    y2 = (y // 2) % 2
    X = np.array([x1, x2, x3])
    Y = np.array([y1, y2])
    ux, layer_v = bp_nn(s, X, Y)
    print(X, Y, np.round(layer_v[-1]))


[0 0 0] [0 0] [ 0.  0.]
[1 0 0] [1 0] [ 1.  0.]
[0 1 0] [1 0] [ 1.  0.]
[1 1 0] [0 1] [ 0.  1.]
[0 0 1] [1 0] [ 1.  0.]
[1 0 1] [0 1] [ 0.  1.]
[0 1 1] [0 1] [ 0.  1.]
[1 1 1] [1 1] [ 1.  1.]

In [169]:
def pred(a_int, b_int):
    a = binary[a_int]
    b = binary[b_int]
    d = np.zeros_like(b)
    h = 0
    for p in range(bin_dim):
        X = np.array([a[-1 - p], b[-1 - p], h])
        
        layer_v = run_nn(s, X)
        
        d[-1 - p] = np.round(layer_v[-1][0])
        
        h = layer_v[-1][-1]
    
    d_int = 0
    for p in range(bin_dim):
        d_int *= 2
        if d[p] == 1:
            d_int += 1
    return d_int

count = 0
for i in range(N // 2):
    for j in range(N // 2):
        if pred(i, j) != i + j:
            count += 1
            print(i, j, pred(i,j))

print("DONE", count)


DONE 0