In [1]:
def sigmoid(x):
return 1.0/(1+np.exp(-x))
import numpy.random as rng
numpy.set_printoptions(precision=3)
global W, Hbias, Vbias, Nitems, Nhid, N # NB. The visible layer is an N-by-N image
def VtoH(V):
nitems = V.shape[0]
prH1 = sigmoid(np.dot(V,W.transpose())+Hbias)
r = rng.rand(nitems,Nhid)
return np.where(prH1>r, 1,0)
def HtoV(H, dropProb=0.0):
nitems = H.shape[0]
prV1 = sigmoid(np.dot(H,W)+Vbias)
r = rng.rand(nitems,N*N)
return np.where(prV1>r, 1,0)
In [15]:
def show_weights():
W_per_h = W.reshape(Nhid,N,N)
nc = int(sqrt(Nhid))
nr = Nhid / nc
i=0
for r in range(int(nr)):
for c in range(nc):
if i<=Nhid:
subplot(nr,nc,i+1)
imshow(W_per_h[i],interpolation='nearest', cmap='RdYlGn')
axis('off')
title('hid %d' % (i))
i = i+1
def show_biases():
subplot(2,1,1)
imshow(Hbias.reshape(1,Nhid),interpolation='nearest', cmap='RdYlGn')
axis('off')
title('bias to hidden layer')
subplot(2,1,2)
imshow(Vbias.reshape(N,N),interpolation='nearest', cmap='RdYlGn')
axis('off')
title('bias to visible layer')
In [50]:
N = 20 #The visible layer will correspond to an N*N image.
In [51]:
# Make a data set of visible patterns.
Nitems = 1000
num_bars= 1
num_spurions = 1
blob_radius = 4 # does not include the center, e.g. a blob_radius of 1 really means blobs that are 3 across.
tmpV = np.zeros((Nitems,N,N),dtype=int)
for n in range(Nitems):
r,c = rng.randint(blob_radius,N-blob_radius), rng.randint(blob_radius,N-blob_radius)
for i in range(r-blob_radius,r+blob_radius+1,1):
for j in range(c-blob_radius,c+blob_radius+1,1):
tmpV[n,i,j] = 1
for k in range(num_spurions):
r,c = rng.randint(N), rng.randint(N)
tmpV[n,r,c] = 1
wakeV = np.reshape(tmpV,(Nitems,N*N))
In [52]:
# Show examples from the training set
nr, nc = 5,8
i=0
for r in range(nr):
for c in range(nc):
subplot(nr, nc, i+1)
imshow(np.reshape(wakeV[i],(N,N)), interpolation='nearest', cmap='copper', vmin=0, vmax=1)
axis('off')
i=i+1
In [53]:
# initialise a network, with weights and biases
Nhid=50 # num units in the hidden layer.
W = 0.1*(rng.random((Nhid,N*N)) -0.5) # hidden node, visible row, visible column
Vbias = 0.1*(rng.random((N*N)) -0.5) # visible row, visible column
Hbias = 0.1*(rng.random(Nhid) -0.5)
show_weights()
In [54]:
# learning via persistent contrastive divergence
learnRate = 0.3 # a magick number!
sleepV = wakeV.copy()
sleepH = VtoH(sleepV)
momentum, prev_W_change = 0.5, 0
for t in range(1001):
if (mod(t,100) == 0): print(t)
wakeH = VtoH(wakeV) # samples from hiddens in the wake phase
sleepV = HtoV(sleepH)
sleepH = VtoH(sleepV) # one step of alternating Gibbs sampling in the sleep phase
# Hebbian learning for wakes, anti-Hebbian for the sleeps
Hebb = wakeV.transpose().reshape(N*N,Nitems,1) * wakeH.reshape(1,Nitems,Nhid)
antiHebb = sleepV.transpose().reshape(N*N,Nitems,1) * sleepH.reshape(1,Nitems,Nhid)
W_change = learnRate/Nitems * (Hebb - antiHebb).sum(1).transpose()
W = W + W_change + momentum*prev_W_change
prev_W_change = W_change
Vbias = Vbias + learnRate/Nitems * (wakeV - sleepV).sum(0)
Vbias = np.mean(Vbias) * np.ones(Vbias.shape) # that's a bit weird isn't it...................
Hbias = Hbias + learnRate/Nitems * (wakeH - sleepH).sum(0)
Hbias = np.mean(Hbias) * np.ones(Hbias.shape)
show_weights()
In [49]:
# Show some samples that each start from a random i.c.
nr, nc = 5,8
testV = np.where(rng.random((nr*nc,N*N)) > 0.5, 1, 0)
for t in range(50):
testV = HtoV(VtoH(testV)) # one step of alternating Gibbs sampling in the sleep phase
i=0
for r in range(nr):
for c in range(nc):
subplot(nr, nc, i+1)
imshow(np.reshape(testV[i],(N,N)), interpolation='nearest', cmap='copper', vmin=0, vmax=1)
axis('off')
i=i+1
In [11]:
W.dump('B_Wgts')
Hbias.dump('B_hBias')
Vbias.dump('B_vBias')