In [162]:
import copy, numpy as np
In [163]:
np.random.seed(0)
def sigmoid(x):
return 1/(1+np.exp(-x))
def sigmoid_dev(y):
return y*(1-y)
def softplus(x):
return np.log(1+np.exp(x))
def softplus_dev(y):
return 1/(1+np.exp(-y))
In [164]:
int2bin = {}
bin_dim = 8
N = pow(2, bin_dim)
binary = np.unpackbits(
np.array([range(N)], dtype=np.uint8).T,
axis=1
)
print(binary)
In [165]:
alpha = 0.1
nn_dim = [3, 8, 2]
input_dim = nn_dim[0]
output_dim = nn_dim[-1]
num_layers = len(nn_dim)
s = list()
for i in range(num_layers - 1):
s.append(2 * np.random.random((nn_dim[i], nn_dim[i+1])) - 1)
u = list()
for i in range(num_layers - 1):
u.append(np.zeros((nn_dim[i], nn_dim[i+1])))
In [166]:
def run_nn(s, X):
out = list()
out.append(copy.deepcopy(X))
layer = X
for mat in s:
layer = sigmoid(np.dot(out[-1], mat))
out.append(copy.deepcopy(layer))
return out
def bp_nn(s, X, Y):
layer_v = run_nn(s, X)
u = list()
for i in range(num_layers - 1):
u.append(np.zeros((nn_dim[i], nn_dim[i+1])))
dets = list()
det = (Y - layer_v[-1]) * sigmoid_dev(layer_v[-1])
dets.append(copy.deepcopy(det))
for i in range(1, len(s) + 1):
mat = s[-i]
det = dets[-1].dot(mat.T) * sigmoid_dev(layer_v[-1-i])
dets.append(copy.deepcopy(det))
for i in range(0, len(s)):
u[i] += np.tensordot(layer_v[i], dets[-2-i], axes=0)
return u, layer_v
In [171]:
def train():
for j in range(20000):
a_int = np.random.randint(N // 2)
a = binary[a_int]
b_int = np.random.randint(N // 2)
b = binary[b_int]
c_int = a_int + b_int
c = binary[c_int]
d = np.zeros_like(c)
overallErr = 0
layer_vals = list()
h_val = 0
for p in range(bin_dim):
X = np.array([a[-1 - p], b[-1 - p], h_val])
Y = np.array([c[-1 - p]])
layer_v = run_nn(s, X)
layer_vals.append(layer_v)
h_val = layer_v[-1][-1]
d[-1-p] = np.round(layer_v[-1][0])
overallErr += np.abs(c[-1 - p] - layer_v[-1][0])
#print(layer_vals)
future_h_d = 0
for p in range(bin_dim):
layer_v = layer_vals[-1 - p]
X = layer_v[0]
#print(layer_v[-1])
dets = list()
d_d = (c[p] - layer_v[-1][0]) * sigmoid_dev(layer_v[-1][0])
if p == 0:
h_d = (0 - layer_v[-1][-1]) * sigmoid_dev(layer_v[-1][-1])
else:
h_d = future_h_d
dets.append(np.array([d_d, h_d]))
for i in range(1, len(s) + 1):
mat = s[-i]
det = dets[-1].dot(mat.T) * sigmoid_dev(layer_v[-1-i])
dets.append(copy.deepcopy(det))
future_h_d = dets[-1][-1]
for i in range(0, len(s)):
u[i] += np.tensordot(layer_v[i], dets[-2-i], axes=0)
for i in range(0, len(s)):
s[i] += u[i] * alpha
u[i] *= 0
if j % 5000 == 0:
print("Error:", overallErr)
print("Pred:", d)
print("True:", c)
#print(s)
train()
In [172]:
def fastTrain():
for i in range(num_layers - 1):
u[i] *= 0
for j in range(20000):
err = 0.0
for x in range(8):
a = x % 2
b = (x // 2) % 2
c = (x // 4) % 2
d = a + b + c
d1 = d % 2
d2 = (d // 2) % 2
X = np.array([a, b, c])
Y = np.array([d1, d2])
ux, layer_v = bp_nn(s, X, Y)
err += np.sum(np.abs(Y - layer_v[-1]))
for i in range(num_layers - 1):
u[i] += ux[i]
for i in range(0, len(s)):
s[i] += u[i] * alpha
u[i] *= 0
if j % 5000 == 0:
print("Error:", err)
#fastTrain()
for x in range(8):
x1 = x % 2
x2 = (x // 2) % 2
x3 = (x // 4) % 2
y = x1 + x2 + x3
y1 = y % 2
y2 = (y // 2) % 2
X = np.array([x1, x2, x3])
Y = np.array([y1, y2])
ux, layer_v = bp_nn(s, X, Y)
print(X, Y, np.round(layer_v[-1]))
In [169]:
def pred(a_int, b_int):
a = binary[a_int]
b = binary[b_int]
d = np.zeros_like(b)
h = 0
for p in range(bin_dim):
X = np.array([a[-1 - p], b[-1 - p], h])
layer_v = run_nn(s, X)
d[-1 - p] = np.round(layer_v[-1][0])
h = layer_v[-1][-1]
d_int = 0
for p in range(bin_dim):
d_int *= 2
if d[p] == 1:
d_int += 1
return d_int
count = 0
for i in range(N // 2):
for j in range(N // 2):
if pred(i, j) != i + j:
count += 1
print(i, j, pred(i,j))
print("DONE", count)