In this experiment we generate 1000 high-order sequences each comprising 10 SDRs. The process of generating these sequences is as follows: We generate a sequence S with 10 random SDRs. Then, we generate sequence S' by substituting n number of SDRs at the beginning and end of S by choosing 2n random SDRs. We repeat this process for 500 times which results in 1000 sequences. We present these sequences to the TM with learning "on". Each training epoch starts by shuffling the 1000 sequences and presenting each of them to the TM. During the simulation we keep track of spike trains from all cells. We use this data to estimate pairwise correlations among cells.
In [1]:
import numpy as np
import random
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from nupic.bindings.algorithms import TemporalMemory as TM
from htmresearch.support.neural_correlations_utils import *
uintType = "uint32"
random.seed(1)
In [3]:
symbolsPerSequence = 10
numSequences = 1000
epochs = 10
totalTS = epochs * numSequences * symbolsPerSequence
In [4]:
tm = TM(columnDimensions = (2048,),
cellsPerColumn=8,
initialPermanence=0.21,
connectedPermanence=0.3,
minThreshold=15,
maxNewSynapseCount=40,
permanenceIncrement=0.1,
permanenceDecrement=0.1,
activationThreshold=15,
predictedSegmentDecrement=0.01,
)
sparsity = 0.02
sparseCols = int(tm.numberOfColumns() * sparsity)
In [5]:
# create sequences
allSequences = []
for s in range(numSequences):
if s % 2 == 0:
sequence = generateRandomSequence(symbolsPerSequence, tm.numberOfColumns(), sparsity)
allSequences.append(sequence)
else:
sequenceHO = generateHOSequence(sequence, symbolsPerSequence, tm.numberOfColumns(), sparsity)
allSequences.append(sequenceHO)
In [6]:
spikeTrains = np.zeros((tm.numberOfCells(), totalTS), dtype = "uint32")
columnUsage = np.zeros(tm.numberOfColumns(), dtype="uint32")
spikeCount = np.zeros(totalTS, dtype="uint32")
ts = 0
entropyX = []
entropyY = []
negPCCX_cells = []
negPCCY_cells = []
numSpikesX = []
numSpikesY = []
numSpikes = 0
negPCCX_cols = []
negPCCY_cols = []
traceX = []
traceY = []
# Randomly generate the indices of the columns to keep track during simulation time
colIndicesLarge = np.random.permutation(tm.numberOfColumns())[0:125] # keep track of 125 columns = 1000 cells
for epoch in range(epochs):
# shuffle sequences
print ""
print "Epoch: " + str(epoch)
seqIndices = np.random.permutation(np.arange(numSequences))
for s in range(numSequences):
tm.reset()
if s > 0 and s % 100 == 0:
print str(s) + " sequences processed"
for symbol in range(symbolsPerSequence):
tm.compute(allSequences[seqIndices[s]][symbol], learn=True)
for cell in tm.getActiveCells():
spikeTrains[cell, ts] = 1
numSpikes += 1
spikeCount[ts] += 1
# Obtain active columns:
activeColumnsIndices = [tm.columnForCell(i) for i in tm.getActiveCells()]
currentColumns = [1 if i in activeColumnsIndices else 0 for i in range(tm.numberOfColumns())]
for col in np.nonzero(currentColumns)[0]:
columnUsage[col] += 1
if ts > 0 and ts % int(totalTS * 0.1) == 0:
numSpikesX.append(ts)
numSpikesY.append(numSpikes)
numSpikes = 0
#print "++ Analyzing correlations (cells at random) ++"
subSpikeTrains = subSample(spikeTrains, 1000, tm.numberOfCells(), ts, 1000)
(corrMatrix, numNegPCC) = computePWCorrelations(subSpikeTrains, removeAutoCorr=True)
negPCCX_cells.append(ts)
negPCCY_cells.append(numNegPCC)
bins = 300
plt.hist(corrMatrix.ravel(), bins, alpha=0.5)
plt.xlim(-0.05,0.1)
plt.xlabel("PCC")
plt.ylabel("Frequency")
plt.savefig("cellsHist_" + str(ts))
plt.close()
traceX.append(ts)
#traceY.append(sum(1 for i in corrMatrix.ravel() if i > 0.5))
#traceY.append(np.std(corrMatrix))
#traceY.append(sum(1 for i in corrMatrix.ravel() if i > -0.05 and i < 0.1))
traceY.append(sum(1 for i in corrMatrix.ravel() if i > 0.0))
entropyX.append(ts)
entropyY.append(computeEntropy(subSpikeTrains))
#print "++ Analyzing correlations (whole columns) ++"
### First the LARGE subsample of columns:
subSpikeTrains = subSampleWholeColumn(spikeTrains, colIndicesLarge, tm.getCellsPerColumn(), ts, 1000)
(corrMatrix, numNegPCC) = computePWCorrelationsWithinCol(subSpikeTrains, True, tm.getCellsPerColumn())
negPCCX_cols.append(ts)
negPCCY_cols.append(numNegPCC)
#print "++ Generating histogram ++"
plt.hist(corrMatrix.ravel(), alpha=0.5)
plt.xlabel("PCC")
plt.ylabel("Frequency")
plt.savefig("colsHist_" + str(ts))
plt.close()
ts += 1
print "*** DONE ***"
In [8]:
plt.plot(traceX, traceY)
plt.xlabel("Time")
plt.ylabel("Positive PCC Count")
plt.savefig("positivePCCTrace")
plt.close()
In [ ]:
sparsityTraceX = []
sparsityTraceY = []
for i in range(totalTS - 1000):
sparsityTraceX.append(i)
sparsityTraceY.append(np.mean(spikeCount[i:1000 + i]) / tm.numberOfCells())
In [ ]:
plt.plot(sparsityTraceX, sparsityTraceY)
plt.xlabel("Time")
plt.ylabel("Sparsity")
plt.savefig("sparsityTrace")
plt.close()
In [ ]:
# plot trace of negative PCCs
plt.plot(negPCCX_cells, negPCCY_cells)
plt.xlabel("Time")
plt.ylabel("Negative PCC Count")
plt.savefig("negPCCTrace_cells")
plt.close()
plt.plot(negPCCX_cols, negPCCY_cols)
plt.xlabel("Time")
plt.ylabel("Negative PCC Count")
plt.savefig("negPCCTrace_cols")
plt.close()
In [ ]:
plt.plot(numSpikesX, numSpikesY)
plt.xlabel("Time")
plt.ylabel("Num Spikes")
plt.savefig("numSpikesTrace")
plt.close()
In [ ]:
# plot entropy
plt.plot(entropyX, entropyY)
plt.xlabel("Time")
plt.ylabel("Entropy")
plt.savefig("entropyTM")
plt.close()
In [ ]:
plt.hist(columnUsage)
plt.xlabel("Number of times active")
plt.ylabel("Number of columns")
plt.savefig("columnUsage")
plt.close()
In [ ]:
subSpikeTrains = subSample(spikeTrains, 1000, tm.numberOfCells(), 0, 0)
In [ ]:
isi = computeISI(subSpikeTrains)
In [ ]:
# Print ISI distribution of TM
#bins = np.linspace(np.min(isi), np.max(isi), 50)
bins = 100
plt.hist(isi, bins)
plt.xlim(0,1000)
# plt.xlim(89500,92000)
plt.xlabel("ISI")
plt.ylabel("Frequency")
plt.savefig("isiTM")
plt.close()
In [ ]:
print np.mean(isi)
print np.std(isi)
print np.std(isi)/np.mean(isi)
In [ ]:
# Generate spike distribution
spikeCount = []
for cell in range(np.shape(subSpikeTrains)[0]):
spikeCount.append(np.count_nonzero(subSpikeTrains[cell,:]))
In [ ]:
bins = 25
plt.hist(spikeCount, bins)
plt.xlabel("Spike Count")
plt.ylabel("Number of cells")
plt.savefig("spikesHistTM")
plt.close()
In [ ]:
#firingRate = 18
firingRate = np.mean(subSpikeTrains) * 1000
print "firing rate: " + str(firingRate)
pSpikeTrain = poissonSpikeGenerator(firingRate,np.shape(subSpikeTrains)[1],np.shape(subSpikeTrains)[0])
In [ ]:
isi = computeISI(pSpikeTrain)
In [ ]:
# Print ISI distribution of Poisson model
#bins = np.linspace(np.min(isi), np.max(isi), 50)
bins = 100
plt.hist(isi, bins)
plt.xlim(0,600)
# plt.xlim(89500,92000)
plt.xlabel("ISI")
plt.ylabel("Frequency")
plt.savefig("isiPOI")
plt.close()
In [ ]:
print np.mean(isi)
print np.std(isi)
print np.std(isi)/np.mean(isi)
In [ ]:
# Generate spike distribution
spikeCount = []
for cell in range(np.shape(pSpikeTrain)[0]):
spikeCount.append(np.count_nonzero(pSpikeTrain[cell,:]))
In [ ]:
bins = 25
plt.hist(spikeCount, bins)
plt.xlabel("Spike Count")
plt.ylabel("Number of cells")
plt.savefig("spikesHistPOI")
plt.close()
In [ ]:
subSpikeTrains = subSample(spikeTrains, 100, tm.numberOfCells(), -1, 1000)
In [ ]:
rasterPlot(subSpikeTrains, "TM")
In [ ]:
pSpikeTrain = poissonSpikeGenerator(firingRate,np.shape(subSpikeTrains)[1],np.shape(subSpikeTrains)[0])
In [ ]:
rasterPlot(pSpikeTrain, "Poisson")
In [ ]:
simpleAccuracyTest("random", tm, allSequences)
In [ ]:
# Sample from both TM_SpikeTrains and Poisson_SpikeTrains. 10 cells for 1000 (?) timesteps
wordLength = 10
firingRate = np.mean(subSpikeTrains) * 1000
# generate all 2^N strings:
binaryStrings = list(itertools.product([0, 1], repeat=wordLength))
In [ ]:
trials = 10
x = [] #observed
y = [] #predicted by random model
for t in range(trials):
print "Trial: " + str(t)
# sample from spike trains
subSpikeTrains = subSample(spikeTrains, wordLength, tm.numberOfCells(), 0, 0)
pSpikeTrain = poissonSpikeGenerator(firingRate,np.shape(subSpikeTrains)[1],np.shape(subSpikeTrains)[0])
for i in range(2**wordLength):
if i == 0:
continue
# if i % 100 == 0:
# print str(i) + " words processed"
binaryWord = np.array(binaryStrings[i], dtype="uint32")
x.append(countInSample(binaryWord, subSpikeTrains))
y.append(countInSample(binaryWord, pSpikeTrain))
# print "**All words processed**"
# print ""
print "*** DONE ***"
In [ ]:
plt.loglog(x, y, 'bo',basex=10)
plt.xlabel("Observed")
plt.ylabel("Predicted")
plt.plot(x,x,'k-')
plt.xlim(0,np.max(x))
plt.savefig("EladPlot")
plt.close()
In [ ]:
saveTM(tm)
In [ ]:
# to load the TM back from the file do:
with open('tm.nta', 'rb') as f:
proto2 = TemporalMemoryProto_capnp.TemporalMemoryProto.read(f, traversal_limit_in_words=2**61)
tm = TM.read(proto2)
In [ ]:
overlapMatrix = inputAnalysis(allSequences, "random", tm.numberOfColumns())
In [ ]:
# show heatmap of overlap matrix
plt.imshow(overlapMatrix, cmap='spectral', interpolation='nearest')
cb = plt.colorbar()
cb.set_label('Overlap Score')
plt.savefig("overlapScore_heatmap")
plt.close()
# plt.show()
# generate histogram
bins = 60
(n, bins, patches) = plt.hist(overlapMatrix.ravel(), bins, alpha=0.5)
plt.xlabel("Overlap Score")
plt.ylabel("Frequency")
plt.savefig("overlapScore_hist")
plt.xlim(0.1,1.0)
plt.ylim(0,200000)
plt.xlabel("Overlap Score")
plt.ylabel("Frequency")
plt.savefig("overlapScore_hist_ZOOM")
plt.close()
In [ ]:
flag = False
for i in range(numSequences*symbolsPerSequence):
for j in range(numSequences*symbolsPerSequence):
if overlapMatrix[i,j] == 1:
print i,j
flag = True
break
if flag == True:
break
In [ ]:
print overlapMatrix[1,11]
In [ ]:
print allSequences[0][1]
print allSequences[1][1]
print percentOverlap(allSequences[0][1], allSequences[1][1], tm.numberOfColumns())
In [ ]: