A recursive neural network that decides how many times to run itself

Produces variable-length outputs for static-length inputs.


In [118]:
import numpy as np

In [172]:
X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([[0],[0,0],[0,0,0],[0,0,0,0]])

In [183]:
def sigmoid(x):
    return np.matrix(1.0 / (1.0 + np.exp(-x)))

def relu(x):
    alpha = 0.01
    return np.maximum(x, (alpha * x))

In [184]:
#initialize random weights
numIn, numHid, numOut = 2, 3, 2
theta1 = np.array( 0.5 * np.sqrt ( 6 / ( numIn + numHid) ) * np.random.randn( numIn + 1, numHid ), dtype="float32" )
theta2 = np.array( 0.5 * np.sqrt ( 6 / ( numHid + numOut ) ) * np.random.randn( numHid + 1, numOut ), dtype="float32" )

In [123]:
theta = np.append(theta1.flatten(), theta2.flatten()) #unroll vectors in a one long vector

In [125]:
def nn(x, theta):
    i = 0
    theta1 = np.array(theta[:9]).reshape(3,3)
    theta2 = np.array(theta[9:]).reshape(4,2)
    #print(theta1.shape)
    #print(theta2.shape)
    outputs = []
    def comp(x):
        #print(x)
        a1 = np.array(np.concatenate((x.reshape(1,2), np.ones((1,1))), axis=1))
        z2 = a1 @ theta1
        a2 = np.concatenate((relu(z2), np.ones((1,1))), axis=1)
        z3 = a2 @ theta2
        a3 = sigmoid(z3)
        return a3
    
    a3 = comp(x)
    outputs.append(a3[0,1])
    while a3[0,0] > 0.5 and i < 3: #prevent an infinite loop; constrain output length
        i += 1
        input = np.array([[a3[0,1],0]])
        a3 = comp(input)
        outputs.append(a3[0,1])
    return np.array(outputs)

The neural network accepts an input vector of length 2. It has 2 output nodes. One node is used to control whether or not to recursively run itself, the other is the real data output. We simply threshold > 0.5 to trigger a recursive call to itself.


In [198]:
###example output with random initial weights
print( nn(X[0], theta) )
print( nn(X[1], theta) ) 
print( nn(X[2], theta) ) 
print( nn(X[3], theta) )


[ 0.62431196]
[ 0.71118059]
[ 0.60979257]
[ 0.69732337]

Cost Function

Arbitrarily assign a high cost to mismatches in the length of the output, then also assess MSE


In [194]:
def costFunction(X, Y, theta):
    cost = 0
    for i in range(len(X)):
        y = Y[i]
        m = float(len(X[i]))
        hThetaX = nn(X[i], theta)
        if len(y) != len(hThetaX):
            cost += 3
        else:
            cost += (1/m) * np.sum(np.abs(y - hThetaX)**2)
        
    return cost

Genetic Algorithm to Solve Weights:


In [185]:
import random as rn, numpy as np
# [Initial population size, mutation rate (=1%), num generations (30), solution length (13), # winners/per gen]
initPop, mutRate, numGen, solLen, numWin = 100, 0.01, 500, 17, 20
#initialize current population to random values within range
curPop = np.random.choice(np.arange(-15,15,step=0.01),size=(initPop, solLen),replace=False)
nextPop = np.zeros((curPop.shape[0], curPop.shape[1]))
fitVec = np.zeros((initPop, 2)) #1st col is indices, 2nd col is cost
for i in range(numGen): #iterate through num generations
    #Create vector of all errors from cost function for each solution
	fitVec = np.array([np.array([x, np.sum(costFunction(X, y, curPop[x].T))]) for x in range(initPop)])
	#plt.pyplot.scatter(i,np.sum(fitVec[:,1]))
	winners = np.zeros((numWin, solLen))
	for n in range(len(winners)): #for n in range(10)
		selected = np.random.choice(range(len(fitVec)), numWin/2, replace=False)
		wnr = np.argmin(fitVec[selected,1])
		winners[n] = curPop[int(fitVec[selected[wnr]][0])]
	nextPop[:len(winners)] = winners #populate new gen with winners
	duplicWin = np.zeros((((initPop - len(winners))),winners.shape[1]))
	for x in range(winners.shape[1]): #for each col in winners (3 cols)
        #Duplicate winners (20x3 matrix) 3 times to create 80x3 matrix, then shuffle columns
		numDups = ((initPop - len(winners))/len(winners)) #num times to duplicate to fill rest of nextPop
		duplicWin[:, x] = np.repeat(winners[:, x], numDups, axis=0)#duplicate each col
		duplicWin[:, x] = np.random.permutation(duplicWin[:, x]) #shuffle each col ("crossover")
    #Populate the rest of the generation with offspring of mating pairs
	nextPop[len(winners):] = np.matrix(duplicWin)
    #Create a mutation matrix, mostly 1s, but some elements are random numbers from a normal distribution
	mutMatrix = [np.float(np.random.normal(0,2,1)) if rn.random() < mutRate else 1 for x in range(nextPop.size)]
    #randomly mutate part of the population by multiplying nextPop by our mutation matrix
	nextPop = np.multiply(nextPop, np.matrix(mutMatrix).reshape(nextPop.shape))
	curPop = nextPop
best_soln = curPop[np.argmin(fitVec[:,1])]
print("Best Sol'n:\n%s\nCost:%s" % (best_soln,np.sum(costFunction(X, y, best_soln.T))))


/Users/brandonbrown/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:16: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/Users/brandonbrown/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:2: RuntimeWarning: overflow encountered in exp
  from ipykernel import kernelapp as app
Best Sol'n:
[[ 13.55        14.58        62.0889891   -6.4966617    7.16086276
   -1.80259996   8.53635902   8.6205749  -10.54        -5.27       -13.5479922
   -0.16212416  -9.13629965   8.46666441  10.39147747  27.13608853
  -23.26767004]]
Cost:0.835819518334

In [193]:
#Demonstrate variable output after training
print( np.round(nn(X[0], best_soln.reshape(17,1)), 2) )
print( np.round(nn(X[1], best_soln.reshape(17,1)), 2) )
print( np.round(nn(X[2], best_soln.reshape(17,1)), 2) )
print( np.round(nn(X[3], best_soln.reshape(17,1)), 2) )


[ 0.]
[ 0.  0.]
[ 0.77  0.    0.  ]
[ 0.99  0.3   0.    0.  ]