In [118]:
import numpy as np
In [172]:
X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([[0],[0,0],[0,0,0],[0,0,0,0]])
In [183]:
def sigmoid(x):
return np.matrix(1.0 / (1.0 + np.exp(-x)))
def relu(x):
alpha = 0.01
return np.maximum(x, (alpha * x))
In [184]:
#initialize random weights
numIn, numHid, numOut = 2, 3, 2
theta1 = np.array( 0.5 * np.sqrt ( 6 / ( numIn + numHid) ) * np.random.randn( numIn + 1, numHid ), dtype="float32" )
theta2 = np.array( 0.5 * np.sqrt ( 6 / ( numHid + numOut ) ) * np.random.randn( numHid + 1, numOut ), dtype="float32" )
In [123]:
theta = np.append(theta1.flatten(), theta2.flatten()) #unroll vectors in a one long vector
In [125]:
def nn(x, theta):
i = 0
theta1 = np.array(theta[:9]).reshape(3,3)
theta2 = np.array(theta[9:]).reshape(4,2)
#print(theta1.shape)
#print(theta2.shape)
outputs = []
def comp(x):
#print(x)
a1 = np.array(np.concatenate((x.reshape(1,2), np.ones((1,1))), axis=1))
z2 = a1 @ theta1
a2 = np.concatenate((relu(z2), np.ones((1,1))), axis=1)
z3 = a2 @ theta2
a3 = sigmoid(z3)
return a3
a3 = comp(x)
outputs.append(a3[0,1])
while a3[0,0] > 0.5 and i < 3: #prevent an infinite loop; constrain output length
i += 1
input = np.array([[a3[0,1],0]])
a3 = comp(input)
outputs.append(a3[0,1])
return np.array(outputs)
The neural network accepts an input vector of length 2. It has 2 output nodes. One node is used to control whether or not to recursively run itself, the other is the real data output. We simply threshold > 0.5 to trigger a recursive call to itself.
In [198]:
###example output with random initial weights
print( nn(X[0], theta) )
print( nn(X[1], theta) )
print( nn(X[2], theta) )
print( nn(X[3], theta) )
In [194]:
def costFunction(X, Y, theta):
cost = 0
for i in range(len(X)):
y = Y[i]
m = float(len(X[i]))
hThetaX = nn(X[i], theta)
if len(y) != len(hThetaX):
cost += 3
else:
cost += (1/m) * np.sum(np.abs(y - hThetaX)**2)
return cost
In [185]:
import random as rn, numpy as np
# [Initial population size, mutation rate (=1%), num generations (30), solution length (13), # winners/per gen]
initPop, mutRate, numGen, solLen, numWin = 100, 0.01, 500, 17, 20
#initialize current population to random values within range
curPop = np.random.choice(np.arange(-15,15,step=0.01),size=(initPop, solLen),replace=False)
nextPop = np.zeros((curPop.shape[0], curPop.shape[1]))
fitVec = np.zeros((initPop, 2)) #1st col is indices, 2nd col is cost
for i in range(numGen): #iterate through num generations
#Create vector of all errors from cost function for each solution
fitVec = np.array([np.array([x, np.sum(costFunction(X, y, curPop[x].T))]) for x in range(initPop)])
#plt.pyplot.scatter(i,np.sum(fitVec[:,1]))
winners = np.zeros((numWin, solLen))
for n in range(len(winners)): #for n in range(10)
selected = np.random.choice(range(len(fitVec)), numWin/2, replace=False)
wnr = np.argmin(fitVec[selected,1])
winners[n] = curPop[int(fitVec[selected[wnr]][0])]
nextPop[:len(winners)] = winners #populate new gen with winners
duplicWin = np.zeros((((initPop - len(winners))),winners.shape[1]))
for x in range(winners.shape[1]): #for each col in winners (3 cols)
#Duplicate winners (20x3 matrix) 3 times to create 80x3 matrix, then shuffle columns
numDups = ((initPop - len(winners))/len(winners)) #num times to duplicate to fill rest of nextPop
duplicWin[:, x] = np.repeat(winners[:, x], numDups, axis=0)#duplicate each col
duplicWin[:, x] = np.random.permutation(duplicWin[:, x]) #shuffle each col ("crossover")
#Populate the rest of the generation with offspring of mating pairs
nextPop[len(winners):] = np.matrix(duplicWin)
#Create a mutation matrix, mostly 1s, but some elements are random numbers from a normal distribution
mutMatrix = [np.float(np.random.normal(0,2,1)) if rn.random() < mutRate else 1 for x in range(nextPop.size)]
#randomly mutate part of the population by multiplying nextPop by our mutation matrix
nextPop = np.multiply(nextPop, np.matrix(mutMatrix).reshape(nextPop.shape))
curPop = nextPop
best_soln = curPop[np.argmin(fitVec[:,1])]
print("Best Sol'n:\n%s\nCost:%s" % (best_soln,np.sum(costFunction(X, y, best_soln.T))))
In [193]:
#Demonstrate variable output after training
print( np.round(nn(X[0], best_soln.reshape(17,1)), 2) )
print( np.round(nn(X[1], best_soln.reshape(17,1)), 2) )
print( np.round(nn(X[2], best_soln.reshape(17,1)), 2) )
print( np.round(nn(X[3], best_soln.reshape(17,1)), 2) )