In [1]:
def sigmoid(x):
    return 1.0/(1+np.exp(-x))

import numpy.random as rng
numpy.set_printoptions(precision=3)
global W, Hbias, Vbias, Nitems, Nhid, N  # NB. The visible layer is an N-by-N image

def VtoH(V):
    nitems = V.shape[0]
    prH1 = sigmoid(np.dot(V,W.transpose())+Hbias)
    r = rng.rand(nitems,Nhid)
    return np.where(prH1>r, 1,0)
    
def HtoV(H, dropProb=0.0):
    nitems = H.shape[0]
    prV1 = sigmoid(np.dot(H,W)+Vbias)
    r = rng.rand(nitems,N*N)
    return np.where(prV1>r, 1,0)

In [15]:
def show_weights():
    W_per_h = W.reshape(Nhid,N,N)
    nc = int(sqrt(Nhid))
    nr = Nhid / nc
    i=0
    for r in range(int(nr)):
        for c in range(nc):
            if i<=Nhid:
                subplot(nr,nc,i+1)
                imshow(W_per_h[i],interpolation='nearest', cmap='RdYlGn')
                axis('off')
                title('hid %d' % (i))
            i = i+1

def show_biases():
    subplot(2,1,1)
    imshow(Hbias.reshape(1,Nhid),interpolation='nearest', cmap='RdYlGn')
    axis('off')
    title('bias to hidden layer')
    subplot(2,1,2)
    imshow(Vbias.reshape(N,N),interpolation='nearest', cmap='RdYlGn')
    axis('off')
    title('bias to visible layer')

In [50]:
N = 20 #The visible layer will correspond to an N*N image.

In [51]:
# Make a data set of visible patterns.
Nitems = 1000
num_bars= 1
num_spurions = 1
blob_radius = 4 # does not include the center, e.g. a blob_radius of 1 really means blobs that are 3 across.
tmpV = np.zeros((Nitems,N,N),dtype=int)
for n in range(Nitems):
    r,c = rng.randint(blob_radius,N-blob_radius), rng.randint(blob_radius,N-blob_radius)
    for i in range(r-blob_radius,r+blob_radius+1,1):
        for j in range(c-blob_radius,c+blob_radius+1,1):
            tmpV[n,i,j] = 1 
    for k in range(num_spurions):
        r,c = rng.randint(N), rng.randint(N)
        tmpV[n,r,c] = 1 
        
wakeV = np.reshape(tmpV,(Nitems,N*N))

In [52]:
# Show examples from the training set
nr, nc = 5,8
i=0
for r in range(nr):
    for c in range(nc):
        subplot(nr, nc, i+1)
        imshow(np.reshape(wakeV[i],(N,N)), interpolation='nearest', cmap='copper', vmin=0, vmax=1)
        axis('off')
        i=i+1



In [53]:
# initialise a network, with weights and biases
Nhid=50 # num units in the hidden layer. 

W = 0.1*(rng.random((Nhid,N*N)) -0.5) # hidden node, visible row, visible column
Vbias = 0.1*(rng.random((N*N)) -0.5) # visible row, visible column
Hbias = 0.1*(rng.random(Nhid) -0.5)
show_weights()



In [54]:
# learning via persistent contrastive divergence
learnRate = 0.3  # a magick number!
sleepV = wakeV.copy()
sleepH = VtoH(sleepV)
momentum, prev_W_change = 0.5, 0
for t in range(1001):
    if (mod(t,100) == 0): print(t)
    wakeH = VtoH(wakeV) # samples from hiddens in the wake phase
    sleepV = HtoV(sleepH)  
    sleepH = VtoH(sleepV) # one step of alternating Gibbs sampling in the sleep phase

    # Hebbian learning for wakes, anti-Hebbian for the sleeps
    Hebb = wakeV.transpose().reshape(N*N,Nitems,1)  * wakeH.reshape(1,Nitems,Nhid)
    antiHebb = sleepV.transpose().reshape(N*N,Nitems,1)  * sleepH.reshape(1,Nitems,Nhid)
    
    W_change = learnRate/Nitems * (Hebb - antiHebb).sum(1).transpose()
    W = W + W_change + momentum*prev_W_change
    prev_W_change = W_change
    Vbias = Vbias + learnRate/Nitems * (wakeV - sleepV).sum(0)
    Vbias = np.mean(Vbias) * np.ones(Vbias.shape)  # that's a bit weird isn't it...................
    Hbias = Hbias + learnRate/Nitems * (wakeH - sleepH).sum(0)
    Hbias = np.mean(Hbias) * np.ones(Hbias.shape)
show_weights()


0
100
200
300
400
500
600
700
800
900
1000

In [49]:
# Show some samples that each start from a random i.c.
nr, nc = 5,8
testV = np.where(rng.random((nr*nc,N*N)) > 0.5, 1, 0)
for t in range(50):
    testV = HtoV(VtoH(testV)) # one step of alternating Gibbs sampling in the sleep phase
i=0
for r in range(nr):
    for c in range(nc):
        subplot(nr, nc, i+1)
        imshow(np.reshape(testV[i],(N,N)), interpolation='nearest', cmap='copper', vmin=0, vmax=1)
        axis('off')
        i=i+1


save them


In [11]:
W.dump('B_Wgts')
Hbias.dump('B_hBias')
Vbias.dump('B_vBias')