EXP 2-HighOrder

In this experiment we generate 1000 high-order sequences each comprising 10 SDRs. The process of generating these sequences is as follows: We generate a sequence S with 10 random SDRs. Then, we generate sequence S' by substituting n number of SDRs at the beginning and end of S by choosing 2n random SDRs. We repeat this process for 500 times which results in 1000 sequences. We present these sequences to the TM with learning "on". Each training epoch starts by shuffling the 1000 sequences and presenting each of them to the TM. During the simulation we keep track of spike trains from all cells. We use this data to estimate pairwise correlations among cells.


In [1]:
import numpy as np
import random
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

from nupic.bindings.algorithms import TemporalMemory as TM
from htmresearch.support.neural_correlations_utils import *
    
uintType = "uint32"
random.seed(1)

In [3]:
symbolsPerSequence = 10
numSequences = 1000
epochs = 10
totalTS = epochs * numSequences * symbolsPerSequence

In [4]:
tm = TM(columnDimensions = (2048,),
    cellsPerColumn=8,
    initialPermanence=0.21,
    connectedPermanence=0.3,
    minThreshold=15,
    maxNewSynapseCount=40,
    permanenceIncrement=0.1,
    permanenceDecrement=0.1,
    activationThreshold=15,
    predictedSegmentDecrement=0.01,
    )

sparsity = 0.02
sparseCols = int(tm.numberOfColumns() * sparsity)

Feed sequences to the TM


In [5]:
# create sequences
allSequences = []

for s in range(numSequences):
    if s % 2 == 0:
        sequence = generateRandomSequence(symbolsPerSequence, tm.numberOfColumns(), sparsity)
        allSequences.append(sequence)
    else:        
        sequenceHO = generateHOSequence(sequence, symbolsPerSequence, tm.numberOfColumns(), sparsity)
        allSequences.append(sequenceHO)

In [6]:
spikeTrains = np.zeros((tm.numberOfCells(), totalTS), dtype = "uint32")
columnUsage = np.zeros(tm.numberOfColumns(), dtype="uint32")
spikeCount = np.zeros(totalTS, dtype="uint32")
ts = 0

entropyX = []
entropyY = []

negPCCX_cells = []
negPCCY_cells = []

numSpikesX = []
numSpikesY = []

numSpikes = 0

negPCCX_cols = []
negPCCY_cols = []

traceX = []
traceY = []


# Randomly generate the indices of the columns to keep track during simulation time
colIndicesLarge = np.random.permutation(tm.numberOfColumns())[0:125] # keep track of 125 columns = 1000 cells

for epoch in range(epochs):
    # shuffle sequences
    print ""
    print "Epoch: " + str(epoch)
    seqIndices = np.random.permutation(np.arange(numSequences))
    
    for s in range(numSequences):
        tm.reset()
        if s > 0 and s % 100 == 0:
            print str(s) + " sequences processed"
        for symbol in range(symbolsPerSequence):
            tm.compute(allSequences[seqIndices[s]][symbol], learn=True)
            
            for cell in tm.getActiveCells():
                spikeTrains[cell, ts] = 1
                numSpikes += 1
                spikeCount[ts] += 1
                
            # Obtain active columns:
            activeColumnsIndices = [tm.columnForCell(i) for i in tm.getActiveCells()]
            currentColumns = [1 if i in activeColumnsIndices else 0 for i in range(tm.numberOfColumns())]
            for col in np.nonzero(currentColumns)[0]:
                columnUsage[col] += 1
                
            if ts > 0 and ts % int(totalTS * 0.1) == 0:
                numSpikesX.append(ts)
                numSpikesY.append(numSpikes)            
                numSpikes = 0
                
                #print "++ Analyzing correlations (cells at random) ++"                
                subSpikeTrains = subSample(spikeTrains, 1000, tm.numberOfCells(), ts, 1000)
                (corrMatrix, numNegPCC) = computePWCorrelations(subSpikeTrains, removeAutoCorr=True)
                negPCCX_cells.append(ts)
                negPCCY_cells.append(numNegPCC)                
                bins = 300
                plt.hist(corrMatrix.ravel(), bins, alpha=0.5)                
                plt.xlim(-0.05,0.1)
                plt.xlabel("PCC")
                plt.ylabel("Frequency")
                plt.savefig("cellsHist_" + str(ts))
                plt.close()

                traceX.append(ts)
                #traceY.append(sum(1 for i in corrMatrix.ravel() if i > 0.5))
                #traceY.append(np.std(corrMatrix))
                #traceY.append(sum(1 for i in corrMatrix.ravel() if i > -0.05 and i < 0.1))
                traceY.append(sum(1 for i in corrMatrix.ravel() if i > 0.0))
                
                entropyX.append(ts)
                entropyY.append(computeEntropy(subSpikeTrains))   

                #print "++ Analyzing correlations (whole columns) ++"
                ### First the LARGE subsample of columns:
                subSpikeTrains = subSampleWholeColumn(spikeTrains, colIndicesLarge, tm.getCellsPerColumn(), ts, 1000)
                (corrMatrix, numNegPCC) = computePWCorrelationsWithinCol(subSpikeTrains, True, tm.getCellsPerColumn())
                negPCCX_cols.append(ts)
                negPCCY_cols.append(numNegPCC)                
                #print "++ Generating histogram ++"
                plt.hist(corrMatrix.ravel(), alpha=0.5)
                plt.xlabel("PCC")
                plt.ylabel("Frequency")
                plt.savefig("colsHist_" + str(ts))
                plt.close()                 
                
                
            ts += 1
                        
print "*** DONE ***"


Epoch: 0
100 sequences processed
200 sequences processed
300 sequences processed
400 sequences processed
500 sequences processed
600 sequences processed
700 sequences processed
800 sequences processed
900 sequences processed

Epoch: 1
100 sequences processed
200 sequences processed
300 sequences processed
400 sequences processed
500 sequences processed
600 sequences processed
700 sequences processed
800 sequences processed
900 sequences processed

Epoch: 2
100 sequences processed
200 sequences processed
300 sequences processed
400 sequences processed
500 sequences processed
600 sequences processed
700 sequences processed
800 sequences processed
900 sequences processed

Epoch: 3
100 sequences processed
200 sequences processed
300 sequences processed
400 sequences processed
500 sequences processed
600 sequences processed
700 sequences processed
800 sequences processed
900 sequences processed

Epoch: 4
100 sequences processed
200 sequences processed
300 sequences processed
400 sequences processed
500 sequences processed
600 sequences processed
700 sequences processed
800 sequences processed
900 sequences processed

Epoch: 5
100 sequences processed
200 sequences processed
300 sequences processed
400 sequences processed
500 sequences processed
600 sequences processed
700 sequences processed
800 sequences processed
900 sequences processed

Epoch: 6
100 sequences processed
200 sequences processed
300 sequences processed
400 sequences processed
500 sequences processed
600 sequences processed
700 sequences processed
800 sequences processed
900 sequences processed

Epoch: 7
100 sequences processed
200 sequences processed
300 sequences processed
400 sequences processed
500 sequences processed
600 sequences processed
700 sequences processed
800 sequences processed
900 sequences processed

Epoch: 8
100 sequences processed
200 sequences processed
300 sequences processed
400 sequences processed
500 sequences processed
600 sequences processed
700 sequences processed
800 sequences processed
900 sequences processed

Epoch: 9
100 sequences processed
200 sequences processed
300 sequences processed
400 sequences processed
500 sequences processed
600 sequences processed
700 sequences processed
800 sequences processed
900 sequences processed
*** DONE ***

In [8]:
plt.plot(traceX, traceY)
plt.xlabel("Time")
plt.ylabel("Positive PCC Count")
plt.savefig("positivePCCTrace")
plt.close()

In [ ]:
sparsityTraceX = []
sparsityTraceY = []
for i in range(totalTS - 1000):
    sparsityTraceX.append(i)
    sparsityTraceY.append(np.mean(spikeCount[i:1000 + i]) / tm.numberOfCells())

In [ ]:
plt.plot(sparsityTraceX, sparsityTraceY)
plt.xlabel("Time")
plt.ylabel("Sparsity")
plt.savefig("sparsityTrace")
plt.close()

In [ ]:
# plot trace of negative PCCs
plt.plot(negPCCX_cells, negPCCY_cells)
plt.xlabel("Time")
plt.ylabel("Negative PCC Count")
plt.savefig("negPCCTrace_cells")
plt.close()

plt.plot(negPCCX_cols, negPCCY_cols)
plt.xlabel("Time")
plt.ylabel("Negative PCC Count")
plt.savefig("negPCCTrace_cols")
plt.close()

In [ ]:
plt.plot(numSpikesX, numSpikesY)
plt.xlabel("Time")
plt.ylabel("Num Spikes")
plt.savefig("numSpikesTrace")
plt.close()

In [ ]:
# plot entropy
plt.plot(entropyX, entropyY)
plt.xlabel("Time")
plt.ylabel("Entropy")
plt.savefig("entropyTM")
plt.close()

In [ ]:
plt.hist(columnUsage)
plt.xlabel("Number of times active")
plt.ylabel("Number of columns")
plt.savefig("columnUsage")
plt.close()

ISI analysis (with Poisson model too)


In [ ]:
subSpikeTrains = subSample(spikeTrains, 1000, tm.numberOfCells(), 0, 0)

In [ ]:
isi = computeISI(subSpikeTrains)

In [ ]:
# Print ISI distribution of TM

#bins = np.linspace(np.min(isi), np.max(isi), 50)
bins = 100
plt.hist(isi, bins)
plt.xlim(0,1000)
# plt.xlim(89500,92000)
plt.xlabel("ISI")
plt.ylabel("Frequency")
plt.savefig("isiTM")
plt.close()

In [ ]:
print np.mean(isi)
print np.std(isi)
print np.std(isi)/np.mean(isi)

In [ ]:
# Generate spike distribution
spikeCount = []
for cell in range(np.shape(subSpikeTrains)[0]):
    spikeCount.append(np.count_nonzero(subSpikeTrains[cell,:]))

In [ ]:
bins = 25
plt.hist(spikeCount, bins)
plt.xlabel("Spike Count")
plt.ylabel("Number of cells")
plt.savefig("spikesHistTM")
plt.close()

In [ ]:
#firingRate = 18
firingRate = np.mean(subSpikeTrains) * 1000
print "firing rate: " + str(firingRate)
pSpikeTrain = poissonSpikeGenerator(firingRate,np.shape(subSpikeTrains)[1],np.shape(subSpikeTrains)[0])

In [ ]:
isi = computeISI(pSpikeTrain)

In [ ]:
# Print ISI distribution of Poisson model
#bins = np.linspace(np.min(isi), np.max(isi), 50)
bins = 100
plt.hist(isi, bins)
plt.xlim(0,600)
# plt.xlim(89500,92000)
plt.xlabel("ISI")
plt.ylabel("Frequency")
plt.savefig("isiPOI")
plt.close()

In [ ]:
print np.mean(isi)
print np.std(isi)
print np.std(isi)/np.mean(isi)

In [ ]:
# Generate spike distribution
spikeCount = []
for cell in range(np.shape(pSpikeTrain)[0]):
    spikeCount.append(np.count_nonzero(pSpikeTrain[cell,:]))

In [ ]:
bins = 25
plt.hist(spikeCount, bins)
plt.xlabel("Spike Count")
plt.ylabel("Number of cells")
plt.savefig("spikesHistPOI")
plt.close()

Raster Plots


In [ ]:
subSpikeTrains = subSample(spikeTrains, 100, tm.numberOfCells(), -1, 1000)

In [ ]:
rasterPlot(subSpikeTrains, "TM")

In [ ]:
pSpikeTrain = poissonSpikeGenerator(firingRate,np.shape(subSpikeTrains)[1],np.shape(subSpikeTrains)[0])

In [ ]:
rasterPlot(pSpikeTrain, "Poisson")

Quick Accuracy Test


In [ ]:
simpleAccuracyTest("random", tm, allSequences)

Elad Plot


In [ ]:
# Sample from both TM_SpikeTrains and Poisson_SpikeTrains. 10 cells for 1000 (?) timesteps
wordLength = 10
firingRate = np.mean(subSpikeTrains) * 1000 

# generate all 2^N strings:
binaryStrings = list(itertools.product([0, 1], repeat=wordLength))

In [ ]:
trials = 10

x = [] #observed
y = [] #predicted by random model

for t in range(trials):
    print "Trial: " + str(t)
    # sample from spike trains
    subSpikeTrains = subSample(spikeTrains, wordLength, tm.numberOfCells(), 0, 0)
    pSpikeTrain = poissonSpikeGenerator(firingRate,np.shape(subSpikeTrains)[1],np.shape(subSpikeTrains)[0])    
    for i in range(2**wordLength):
        if i == 0:
            continue
#         if i % 100 == 0:
#             print str(i) + " words processed"
        binaryWord = np.array(binaryStrings[i], dtype="uint32")
        x.append(countInSample(binaryWord, subSpikeTrains))
        y.append(countInSample(binaryWord, pSpikeTrain))
#     print "**All words processed**"    
#     print ""
print "*** DONE ***"

In [ ]:
plt.loglog(x, y, 'bo',basex=10)
plt.xlabel("Observed")
plt.ylabel("Predicted")
plt.plot(x,x,'k-')
plt.xlim(0,np.max(x))
plt.savefig("EladPlot")
plt.close()

Save TM


In [ ]:
saveTM(tm)

In [ ]:
# to load the TM back from the file do:
with open('tm.nta', 'rb') as f:
    proto2 = TemporalMemoryProto_capnp.TemporalMemoryProto.read(f, traversal_limit_in_words=2**61)
tm = TM.read(proto2)

Analysis of input


In [ ]:
overlapMatrix = inputAnalysis(allSequences, "random", tm.numberOfColumns())

In [ ]:
# show heatmap of overlap matrix
plt.imshow(overlapMatrix, cmap='spectral', interpolation='nearest')
cb = plt.colorbar()
cb.set_label('Overlap Score')
plt.savefig("overlapScore_heatmap")
plt.close()
# plt.show()

# generate histogram
bins = 60
(n, bins, patches) = plt.hist(overlapMatrix.ravel(), bins, alpha=0.5)

plt.xlabel("Overlap Score")
plt.ylabel("Frequency")
plt.savefig("overlapScore_hist")

plt.xlim(0.1,1.0)
plt.ylim(0,200000)
plt.xlabel("Overlap Score")
plt.ylabel("Frequency")
plt.savefig("overlapScore_hist_ZOOM")
plt.close()

In [ ]:
flag = False
for i in range(numSequences*symbolsPerSequence):
    for j in range(numSequences*symbolsPerSequence):
        if overlapMatrix[i,j] == 1:
            print i,j
            flag = True
            break
    if flag == True:
        break

In [ ]:
print overlapMatrix[1,11]

In [ ]:
print allSequences[0][1]
print allSequences[1][1]
print percentOverlap(allSequences[0][1], allSequences[1][1], tm.numberOfColumns())

In [ ]: