What do we mean by learning?
So, we'll stick with a simple definition of learning
This is the "Prescribed Error Sensitivity" PES rule
Is this realistic?
In [120]:
#From the learning examples in nengo - a Communication Channel
%pylab inline
import nengo
from nengo.processes import WhiteSignal
model = nengo.Network('Learn a Communication Channel')
with model:
stim = nengo.Node(output=WhiteSignal(10, high=5, rms=0.5), size_out=2)
pre = nengo.Ensemble(60, dimensions=2)
post = nengo.Ensemble(60, dimensions=2)
nengo.Connection(stim, pre)
conn = nengo.Connection(pre, post, function=lambda x: np.random.random(2))
inp_p = nengo.Probe(stim)
pre_p = nengo.Probe(pre, synapse=0.01)
post_p = nengo.Probe(post, synapse=0.01)
sim = nengo.Simulator(model)
sim.run(10.0)
In [121]:
t=sim.trange()
figure(figsize=(12, 8))
subplot(2, 1, 1)
plot(t, sim.data[inp_p].T[0], c='k', label='Input')
plot(t, sim.data[pre_p].T[0], c='b', label='Pre')
plot(t, sim.data[post_p].T[0], c='r', label='Post')
ylabel("Dimension 1")
legend(loc='best')
title('Random function computation')
subplot(2, 1, 2)
plot(t, sim.data[inp_p].T[1], c='k', label='Input')
plot(t, sim.data[pre_p].T[1], c='b', label='Pre')
plot(t, sim.data[post_p].T[1], c='r', label='Post')
ylabel("Dimension 2")
legend(loc='best');
In [122]:
from nengo_gui.ipython import IPythonViz
IPythonViz(model,'pre_learn.py.cfg')
In [123]:
#Now learn
with model:
error = nengo.Ensemble(60, dimensions=2)
error_p = nengo.Probe(error, synapse=0.03)
# Error = actual - target = post - pre
nengo.Connection(post, error)
nengo.Connection(pre, error, transform=-1)
# Add the learning rule to the connection
conn.learning_rule_type = nengo.PES()
# Connect the error into the learning rule
learn_conn = nengo.Connection(error, conn.learning_rule)
sim = nengo.Simulator(model)
sim.run(10.0)
In [124]:
t=sim.trange()
figure(figsize=(12, 8))
subplot(3, 1, 1)
plot(t, sim.data[inp_p].T[0], c='k', label='Input')
plot(t, sim.data[pre_p].T[0], c='b', label='Pre')
plot(t, sim.data[post_p].T[0], c='r', label='Post')
ylabel("Dimension 1")
legend(loc='best')
title('Learn a communication channel')
subplot(3, 1, 2)
plot(t, sim.data[inp_p].T[1], c='k', label='Input')
plot(t, sim.data[pre_p].T[1], c='b', label='Pre')
plot(t, sim.data[post_p].T[1], c='r', label='Post')
ylabel("Dimension 2")
legend(loc='best');
subplot(3, 1, 3)
plot(sim.trange(), sim.data[error_p], c='b')
ylim(-1, 1)
legend(("Error[0]", "Error[1]"), loc='best');
title('Error')
Out[124]:
In [125]:
from nengo_gui.ipython import IPythonViz
IPythonViz(model,'simple_learn.py.cfg')
In [109]:
#Turning learning on and off to test generalization
def inhibit(t):
return 2.0 if t > 10.0 else 0.0
with model:
inhib = nengo.Node(inhibit)
inhib_conn = nengo.Connection(inhib, error.neurons, transform=[[-1]] * error.n_neurons)
sim = nengo.Simulator(model)
sim.run(16.0)
In [110]:
t=sim.trange()
figure(figsize=(12, 8))
subplot(3, 1, 1)
plot(t, sim.data[inp_p].T[0], c='k', label='Input')
plot(t, sim.data[pre_p].T[0], c='b', label='Pre')
plot(t, sim.data[post_p].T[0], c='r', label='Post')
ylabel("Dimension 1")
legend(loc='best')
title('Learn a communication channel')
subplot(3, 1, 2)
plot(t, sim.data[inp_p].T[1], c='k', label='Input')
plot(t, sim.data[pre_p].T[1], c='b', label='Pre')
plot(t, sim.data[post_p].T[1], c='r', label='Post')
ylabel("Dimension 2")
legend(loc='best');
subplot(3, 1, 3)
plot(sim.trange(), sim.data[error_p], c='b')
ylim(-1, 1)
legend(("Error[0]", "Error[1]"), loc='best');
title('Error')
Out[110]:
In [111]:
from nengo_gui.ipython import IPythonViz
IPythonViz(model,'control_learn.py.cfg')
In [112]:
#Compute a nonlinear functions
#model.connections.remove(err_fcn) #uncomment to try other fcns
#del err_fcn
model.connections.remove(inhib_conn)
del inhib_conn
model.nodes.remove(inhib)
model.connections.remove(learn_conn)
del learn_conn
def nonlinear(x):
return x[0]*x[0], x[1]*x[1]
with model:
err_fcn = nengo.Connection(pre, error, function=nonlinear, transform=-1)
conn.learning_rule_type = nengo.PES(learning_rate=1e-4)
# Connect the error into the learning rule
learn_conn = nengo.Connection(error, conn.learning_rule)
sim = nengo.Simulator(model)
sim.run(26.0)
In [113]:
t=sim.trange()
figure(figsize=(12, 8))
subplot(3, 1, 1)
plot(t, sim.data[inp_p].T[0], c='k', label='Input')
plot(t, sim.data[pre_p].T[0], c='b', label='Pre')
plot(t, sim.data[post_p].T[0], c='r', label='Post')
ylabel("Dimension 1")
legend(loc='best')
title('Learn a nonlinear function')
subplot(3, 1, 2)
plot(t, sim.data[inp_p].T[1], c='k', label='Input')
plot(t, sim.data[pre_p].T[1], c='b', label='Pre')
plot(t, sim.data[post_p].T[1], c='r', label='Post')
ylabel("Dimension 2")
legend(loc='best');
subplot(3, 1, 3)
plot(sim.trange(), sim.data[error_p], c='b')
ylim(-1, 1)
legend(("Error[0]", "Error[1]"), loc='best');
title('Error')
Out[113]:
In [116]:
from nengo_gui.ipython import IPythonViz
IPythonViz(model,'square_learn.py.cfg')
Classical or Pavlovian conditioning uses an unconditioned stimuli (US) (meat for a dog) that ellicits an unconditioned response (UR) (salivating) to cause a conditioned response (CR) (salivating after learning) to be ellicited by a conditioned stimulus (CS) (ringing a bell).
The best known model of this is the Resorla-Wagner model that states: $\Delta V_x = \alpha (\lambda - \sum_x V)$ where $V_x$ is the value of x, $\alpha$ is a learning rate and salience parameter, $\lambda$ is the max value (usually 1). In the model below there is only 1 element in $\sum V$. The difference in brackets is like a reward prediction error.
In this model:
In [12]:
import nengo
import numpy as np
D = 3
N = D*50
def us_stim(t):
# cycle through the three US
t = t % 3
if 0.9 < t< 1: return [1, 0, 0]
if 1.9 < t< 2: return [0, 1, 0]
if 2.9 < t< 3: return [0, 0, 1]
return [0, 0, 0]
def cs_stim(t):
# cycle through the three CS
t = t % 3
if 0.7 < t< 1: return [0.7, 0, 0.5]
if 1.7 < t< 2: return [0.6, 0.7, 0.8]
if 2.7 < t< 3: return [0, 1, 0]
return [0, 0, 0]
model = nengo.Network(label="Classical Conditioning")
with model:
us_stim = nengo.Node(us_stim)
cs_stim = nengo.Node(cs_stim)
us = nengo.Ensemble(N, D)
cs = nengo.Ensemble(N*2, D*2)
nengo.Connection(us_stim, us[:D])
nengo.Connection(cs_stim, cs[:D])
nengo.Connection(cs[:D], cs[D:], synapse=0.2)
ur = nengo.Ensemble(N, D)
nengo.Connection(us, ur)
cr = nengo.Ensemble(N, D)
learn_conn = nengo.Connection(cs, cr, function=lambda x: [0]*D)
learn_conn.learning_rule_type = nengo.PES(learning_rate=3e-4)
error = nengo.Ensemble(N, D)
nengo.Connection(error, learn_conn.learning_rule)
nengo.Connection(ur, error, transform=-1)
nengo.Connection(cr, error, transform=1, synapse=0.1)
stop_learn = nengo.Node([0])
nengo.Connection(stop_learn, error.neurons, transform=-10*np.ones((N, 1)))
In [13]:
from nengo_gui.ipython import IPythonViz
IPythonViz(model,'learning2-conditioning.py.cfg')
There is evidence that when you first learn a skill, it takes a lot of effort and you tend to perform fairly slowly. We would think of this as requiring a lot of intervention from the basal ganglia in selecting actions. As you get better at the skill you become much faster, and BG is used less because cortex 'takes over' cental aspects of that skill, consolidating it into cortico-cortical connections. This model shows a toy version of this kind of behaviour.
In this model:
In [7]:
import nengo
import numpy as np
tau_slow = 0.2
model = nengo.Network("Cortical Consolidation")
with model:
pre_value = nengo.Node(lambda t: np.sin(t))
pre = nengo.Ensemble(100, 1)
post = nengo.Ensemble(100, 1)
target = nengo.Ensemble(100, 1)
nengo.Connection(pre_value, pre)
conn = nengo.Connection(pre, post, function=lambda x: np.random.random(),
learning_rule_type=nengo.PES())
wm = nengo.Ensemble(300, 2, radius=1.4)
context = nengo.Node(1)
nengo.Connection(context, wm[1])
nengo.Connection(pre, wm[0], synapse=tau_slow)
nengo.Connection(wm, target, synapse=tau_slow,
function=lambda x: x[0]*x[1])
error = nengo.Ensemble(n_neurons=100, dimensions=1)
nengo.Connection(post, error, synapse=tau_slow*2, transform=1) #Delay the fast connection so they line up
nengo.Connection(target, error, transform=-1)
nengo.Connection(error, conn.learning_rule)
stop_learn = nengo.Node([0])
nengo.Connection(stop_learn, error.neurons, transform=-10*np.ones((100,1)))
both = nengo.Node(None, size_in=2) #For plotting
nengo.Connection(post, both[0], synapse=None)
nengo.Connection(target, both[1], synapse=None)
In [8]:
from nengo_gui.ipython import IPythonViz
IPythonViz(model,'learning3-consolidation.py.cfg')
As mentioned in the last lecture, RL is a useful way to think about action selection. You have a set of actions and a set of states, and you figure out the value of each action in each state, letting you construct a big table $Q(s,a)$ which you can use to pick good actions. RL figures out what those values are through trial and error.
$\Delta Q(s,a) = \alpha (R + \gamma Q_{predicted} - Q_{old})$ where $R$ is reward, $\alpha$ is a learning rate and $\gamma$ is a discount factor.
In the model:
In [5]:
# requires CCMSuite https://github.com/tcstewar/ccmsuite/
import ccm.lib.grid
import ccm.lib.continuous
import ccm.ui.nengo
mymap="""
#########
# #
# #
# ## #
# ## #
# #
#########
"""
class Cell(ccm.lib.grid.Cell):
def color(self):
return 'black' if self.wall else None
def load(self, char):
if char == '#':
self.wall = True
world = ccm.lib.grid.World(Cell, map=mymap, directions=4)
body = ccm.lib.continuous.Body()
world.add(body, x=1, y=3, dir=2)
import nengo
import numpy as np
def move(t, x):
speed, rotation = x
dt = 0.001
max_speed = 20.0
max_rotate = 10.0
body.turn(rotation * dt * max_rotate)
success = body.go_forward(speed * dt * max_speed)
if not success: #Hit a wall
return -1
else:
return speed
model = nengo.Network("Simple RL", seed=2)
with model:
env = ccm.ui.nengo.GridNode(world, dt=0.005)
movement_node = nengo.Node(move, size_in=2, label='reward')
movement = nengo.Ensemble(n_neurons=100, dimensions=2, radius=1.4)
nengo.Connection(movement, movement_node)
def detect(t):
angles = (np.linspace(-0.5, 0.5, 3) + body.dir ) % world.directions
return [body.detect(d, max_distance=4)[0] for d in angles]
stim_radar = nengo.Node(detect)
radar = nengo.Ensemble(n_neurons=50, dimensions=3, radius=4)
nengo.Connection(stim_radar, radar)
bg = nengo.networks.actionselection.BasalGanglia(3)
thal = nengo.networks.actionselection.Thalamus(3)
nengo.Connection(bg.output, thal.input)
def u_fwd(x):
return 0.8
def u_left(x):
return 0.6
def u_right(x):
return 0.7
conn_fwd = nengo.Connection(radar, bg.input[0], function=u_fwd, learning_rule_type=nengo.PES())
conn_left = nengo.Connection(radar, bg.input[1], function=u_left, learning_rule_type=nengo.PES())
conn_right = nengo.Connection(radar, bg.input[2], function=u_right, learning_rule_type=nengo.PES())
nengo.Connection(thal.output[0], movement, transform=[[1],[0]])
nengo.Connection(thal.output[1], movement, transform=[[0],[1]])
nengo.Connection(thal.output[2], movement, transform=[[0],[-1]])
errors = nengo.networks.EnsembleArray(n_neurons=50, n_ensembles=3)
nengo.Connection(movement_node, errors.input, transform=-np.ones((3,1)))
nengo.Connection(bg.output[0], errors.ensembles[0].neurons, transform=np.ones((50,1))*4)
nengo.Connection(bg.output[1], errors.ensembles[1].neurons, transform=np.ones((50,1))*4)
nengo.Connection(bg.output[2], errors.ensembles[2].neurons, transform=np.ones((50,1))*4)
nengo.Connection(bg.input, errors.input, transform=1)
nengo.Connection(errors.ensembles[0], conn_fwd.learning_rule)
nengo.Connection(errors.ensembles[1], conn_left.learning_rule)
nengo.Connection(errors.ensembles[2], conn_right.learning_rule)
In [2]:
from nengo_gui.ipython import IPythonViz
IPythonViz(model,'learning5-utility.py.cfg')
To improve our RL it would be good to predict future rewards more accurately. It would be good to learn the function $Q(s,a)$. Let's assume that your policy is fixed, so future actions are fixed. As well, future rewards are 90\% as good as current rewards (i.e. they are discounted). Consequently, we have
$Q(s,t) = R(s,t) + 0.9 R(s+1, t+1) + 0.9^2 R(s+2, t+2) + ...$.
So also,
$Q(s+1,t+1) = R(s+1,t+1) + 0.9 R(s+2, t+2) + 0.9^2 R(s+3, t+3) + ...$. $0.9 Q(s+1,t+1) = 0.9 R(s+1,t+1) + 0.9^2 R(s+2, t+2) + 0.9^3 R(s+3, t+3) + ...$.
Substituting this last equation into the first gives $Q(s,t) = R(s,t) + 0.9 Q(s+1, t+1)$. This suggests an error rule: $Error(t) = Q(s-1) - R(s-1) - 0.9 Q(s)$
In this model:
In [16]:
# requires CCMSuite https://github.com/tcstewar/ccmsuite/
import ccm.lib.grid
import ccm.lib.continuous
import ccm.ui.nengo
mymap="""
#######
# #
# # # #
# # # #
#G R#
#######
"""
class Cell(ccm.lib.grid.Cell):
def color(self):
if self.wall:
return 'black'
elif self.reward > 0:
return 'green'
elif self.reward < 0:
return 'red'
return None
def load(self, char):
self.reward = 0
if char == '#':
self.wall = True
if char == 'G':
self.reward = 10
elif char == 'R':
self.reward = -10
world = ccm.lib.grid.World(Cell, map=mymap, directions=4)
body = ccm.lib.continuous.Body()
world.add(body, x=1, y=2, dir=2)
import nengo
import numpy as np
tau=0.1
def move(t, x):
speed, rotation = x
dt = 0.001
max_speed = 20.0
max_rotate = 10.0
body.turn(rotation * dt * max_rotate)
body.go_forward(speed * dt * max_speed)
if int(body.x) == 1:
world.grid[4][4].wall = True
world.grid[4][2].wall = False
if int(body.x) == 4:
world.grid[4][2].wall = True
world.grid[4][4].wall = False
model = nengo.Network("Predict Value", seed=2)
with model:
env = ccm.ui.nengo.GridNode(world, dt=0.005)
movement = nengo.Node(move, size_in=2)
def detect(t):
angles = (np.linspace(-0.5, 0.5, 3) + body.dir) % world.directions
return [body.detect(d, max_distance=4)[0] for d in angles]
stim_radar = nengo.Node(detect)
radar = nengo.Ensemble(n_neurons=50, dimensions=3, radius=4, seed=2,
noise=nengo.processes.WhiteSignal(10, 0.1, rms=1))
nengo.Connection(stim_radar, radar)
def braiten(x):
turn = x[2] - x[0]
spd = x[1] - 0.5
return spd, turn
nengo.Connection(radar, movement, function=braiten)
def position_func(t):
return body.x / world.width * 2 - 1, 1 - body.y/world.height * 2, body.dir / world.directions
position = nengo.Node(position_func)
state = nengo.Ensemble(100, 3)
nengo.Connection(position, state, synapse=None)
reward = nengo.Node(lambda t: body.cell.reward)
value = nengo.Ensemble(n_neurons=50, dimensions=1)
learn_conn = nengo.Connection(state, value, function=lambda x: 0,
learning_rule_type=nengo.PES(learning_rate=1e-4,
pre_tau=tau))
nengo.Connection(reward, learn_conn.learning_rule,
transform=-1, synapse=tau)
nengo.Connection(value, learn_conn.learning_rule,
transform=-0.9, synapse=0.01)
nengo.Connection(value, learn_conn.learning_rule,
transform=1, synapse=tau)
In [17]:
from nengo_gui.ipython import IPythonViz
IPythonViz(model,'learning6-value.py.cfg')
Hebbian learning
BCM rule (Bienenstock, Cooper, & Munro, 1982)
In [90]:
import nengo
model = nengo.Network()
with model:
sin = nengo.Node(lambda t: np.sin(t*4))
pre = nengo.Ensemble(100, dimensions=1)
post = nengo.Ensemble(100, dimensions=1)
nengo.Connection(sin, pre)
conn = nengo.Connection(pre, post, solver=nengo.solvers.LstsqL2(weights=True))
pre_p = nengo.Probe(pre, synapse=0.01)
post_p = nengo.Probe(post, synapse=0.01)
sim = nengo.Simulator(model)
sim.run(2.0)
In [91]:
plot(sim.trange(), sim.data[pre_p], label="Pre")
plot(sim.trange(), sim.data[post_p], label="Post")
ylabel("Decoded value")
legend(loc="best");
In [93]:
conn.learning_rule_type = nengo.BCM(learning_rate=5e-10)
with model:
trans_p = nengo.Probe(conn, 'weights', synapse=0.01, sample_every=0.01)
sim = nengo.Simulator(model)
sim.run(20.0)
In [94]:
figure(figsize=(12, 8))
subplot(2, 1, 1)
plot(sim.trange(), sim.data[pre_p], label="Pre")
plot(sim.trange(), sim.data[post_p], label="Post")
ylabel("Decoded value")
ylim(-1.6, 1.6)
legend(loc="lower left")
subplot(2, 1, 2)
# Find weight row with max variance
neuron = np.argmax(np.mean(np.var(sim.data[trans_p], axis=0), axis=1))
plot(sim.trange(dt=0.01), sim.data[trans_p][..., neuron])
ylabel("Connection weight");
In [95]:
def sparsity_measure(vector):
# Max sparsity = 1 (single 1 in the vector)
v = np.sort(np.abs(vector))
n = v.shape[0]
k = np.arange(n) + 1
l1norm = np.sum(v)
summation = np.sum((v / l1norm) * ((n - k + 0.5) / n))
return 1 - 2 * summation
print "Starting sparsity: {0}".format(sparsity_measure(sim.data[trans_p][0]))
print "Ending sparsity: {0}".format(sparsity_measure(sim.data[trans_p][-1]))
$\Delta \omega_{ij} = \kappa \alpha_j a_j (S e_j \cdot E + (1-S) a_j (a_j-\theta))$
Works as well (or better) than PES
In [ ]: