In this experiment we generate data from a sinusoid with added Gaussian noise. We pass it through a scalar encoder, spatial pooler and then to the TM. We do a single pass to the TM and keep track of the spike trains of each cell. We use this data to estimate pairwise correlations among cells.
In [1]:
import numpy as np
import random
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import pandas as pd
from nupic.encoders import ScalarEncoder
from nupic.bindings.algorithms import TemporalMemory as TM
from nupic.bindings.algorithms import SpatialPooler as SP
from htmresearch.support.neural_correlations_utils import *
random.seed(1)
In [2]:
inputSize = 109
maxItems = 10000
tmEpochs = 1
totalTS = maxItems * tmEpochs
In [3]:
tm = TM(columnDimensions = (2048,),
cellsPerColumn=8, # originally this value is 32
initialPermanence=0.21,
connectedPermanence=0.3,
minThreshold=15,
maxNewSynapseCount=40,
permanenceIncrement=0.1,
permanenceDecrement=0.1,
activationThreshold=15,
predictedSegmentDecrement=0.01
)
sparsity = 0.02
sparseCols = int(tm.numberOfColumns() * sparsity)
sp = SP(inputDimensions=(inputSize,),
columnDimensions=(2048,),
potentialRadius = int(0.5*inputSize),
numActiveColumnsPerInhArea = sparseCols,
globalInhibition = True,
synPermActiveInc = 0.0001,
synPermInactiveDec = 0.0005,
synPermConnected = 0.5,
boostStrength = 0.0,
spVerbosity = 1
)
In [4]:
# Generate random data
noiseLevel = 0.25
x = np.linspace(-100, 100, maxItems)
y = np.sin(x)
noise = np.random.normal(0, noiseLevel, maxItems)
noisY = np.sin(x) + noise
plt.plot(x, noisY)
plt.xlabel("x")
plt.ylabel("f(x)")
plt.savefig("rawData")
plt.close()
In [5]:
numTrainingItems = 10000
trainSet = []
nonTrainSet = []
se = ScalarEncoder(n=110, w=21, minval=min(noisY), maxval=max(noisY), clipInput=True)
for i in range(maxItems):
if i > 0 and i % 1000 == 0:
print str(i) + " items processed"
if i < numTrainingItems:
trainSet.append(se.encode(noisY[i]))
else:
nonTrainSet.append(se.encode(noisY[i]))
print "*** All items encoded! ***"
In [6]:
allSequences = []
outputColumns = np.zeros(tm.numberOfColumns(), dtype="uint32")
columnUsage = np.zeros(tm.numberOfColumns(), dtype="uint32")
# Set epochs for spatial-pooling:
spEpochs = 15 # 10
for epoch in range(spEpochs):
print "Training epoch: " + str(epoch)
#randomize records in training set
randomIndex = np.random.permutation(np.arange(numTrainingItems))
for i in range(numTrainingItems):
sp.compute(trainSet[randomIndex[i]], True, outputColumns)
# Populate array for Yuwei plot:
for col in outputColumns.nonzero():
columnUsage[col] += 1
if epoch == (spEpochs - 1):
allSequences.append(outputColumns.nonzero())
for i in range(maxItems - numTrainingItems):
if i > 0 and i % 500 == 0:
print str(i) + " items processed"
sp.compute(nonTrainSet[i], False, outputColumns)
allSequences.append(outputColumns.nonzero())
# Populate array for Yuwei plot:
for col in outputColumns.nonzero():
columnUsage[col] += 1
print "*** All items processed! ***"
In [7]:
bins = 50
plt.hist(columnUsage, bins)
plt.xlabel("Number of times active")
plt.ylabel("Number of columns")
plt.savefig("columnUsage_SP")
plt.close()
In [8]:
spikeTrains = np.zeros((tm.numberOfCells(), totalTS), dtype = "uint32")
columnUsage = np.zeros(tm.numberOfColumns(), dtype="uint32")
spikeCount = np.zeros(totalTS, dtype="uint32")
ts = 0
entropyX = []
entropyY = []
negPCCX_cells = []
negPCCY_cells = []
numSpikesX = []
numSpikesY = []
numSpikes = 0
negPCCX_cols = []
negPCCY_cols = []
traceX = []
traceY = []
# Randomly generate the indices of the columns to keep track during simulation time
# keep track of 125 columns = 1000 cells
#colIndicesLarge = np.random.permutation(tm.numberOfColumns())[0:125]
for e in range(tmEpochs):
print ""
print "Epoch: " + str(e)
for s in range(maxItems):
if s % 1000 == 0:
print str(s) + " items processed"
tm.compute(allSequences[s][0].tolist(), learn=True)
for cell in tm.getActiveCells():
spikeTrains[cell, ts] = 1
numSpikes += 1
spikeCount[ts] += 1
# Obtain active columns:
activeColumnsIndices = [tm.columnForCell(i) for i in tm.getActiveCells()]
currentColumns = [1 if i in activeColumnsIndices else 0 for i in range(tm.numberOfColumns())]
for col in np.nonzero(currentColumns)[0]:
columnUsage[col] += 1
if ts > 0 and ts % 1000 == 0:
numSpikesX.append(ts)
numSpikesY.append(numSpikes)
numSpikes = 0
subSpikeTrains = subSample(spikeTrains, 1000, tm.numberOfCells(), ts, 1000)
(corrMatrix, numNegPCC) = computePWCorrelations(subSpikeTrains, removeAutoCorr=True)
negPCCX_cells.append(ts)
negPCCY_cells.append(numNegPCC)
bins = 300
plt.hist(corrMatrix.ravel(), bins, alpha=0.5)
plt.xlim(-0.1,0.2)
plt.xlabel("PCC")
plt.ylabel("Frequency")
plt.savefig("cellsHist" + str(ts))
plt.close()
traceX.append(ts)
#traceY.append(sum(1 for i in corrMatrix.ravel() if i > 0.5))
#traceY.append(np.std(corrMatrix))
#traceY.append(sum(1 for i in corrMatrix.ravel() if i > -0.05 and i < 0.1))
traceY.append(sum(1 for i in corrMatrix.ravel() if i > 0.0))
entropyX.append(ts)
entropyY.append(computeEntropy(subSpikeTrains))
#print "++ Analyzing correlations (whole columns) ++"
### First the LARGE subsample of columns:
colIndicesLarge = np.random.permutation(tm.numberOfColumns())[0:125]
subSpikeTrains = subSampleWholeColumn(spikeTrains, colIndicesLarge, tm.getCellsPerColumn(), ts, 1000)
(corrMatrix, numNegPCC) = computePWCorrelationsWithinCol(subSpikeTrains, True, tm.getCellsPerColumn())
negPCCX_cols.append(ts)
negPCCY_cols.append(numNegPCC)
#print "++ Generating histogram ++"
plt.hist(corrMatrix.ravel(), alpha=0.5)
plt.xlabel("PCC")
plt.ylabel("Frequency")
plt.savefig("colsHist_" + str(ts))
plt.close()
ts += 1
print ""
print "*** DONE ***"
# end for-epochs
In [9]:
plt.plot(traceX, traceY)
plt.xlabel("Time")
plt.ylabel("Positive PCC Count")
plt.savefig("positivePCCTrace")
plt.close()
In [ ]:
sparsityTraceX = []
sparsityTraceY = []
for i in range(totalTS - 1000):
sparsityTraceX.append(i)
sparsityTraceY.append(np.mean(spikeCount[i:1000 + i]) / tm.numberOfCells())
plt.plot(sparsityTraceX, sparsityTraceY)
plt.xlabel("Time")
plt.ylabel("Sparsity")
plt.savefig("sparsityTrace")
plt.close()
In [ ]:
# plot trace of negative PCCs
plt.plot(negPCCX_cells, negPCCY_cells)
plt.xlabel("Time")
plt.ylabel("Negative PCC Count")
plt.savefig("negPCCTrace_cells")
plt.close()
plt.plot(negPCCX_cols, negPCCY_cols)
plt.xlabel("Time")
plt.ylabel("Negative PCC Count")
plt.savefig("negPCCTrace_cols")
plt.close()
In [ ]:
plt.plot(numSpikesX, numSpikesY)
plt.xlabel("Time")
plt.ylabel("Num Spikes")
plt.savefig("numSpikesTrace")
plt.close()
In [ ]:
# print computeEntropy()
plt.plot(entropyX, entropyY)
plt.xlabel("Time")
plt.ylabel("Entropy")
plt.savefig("entropyTM")
plt.close()
In [ ]:
bins = 50
plt.hist(columnUsage, bins)
plt.xlabel("Number of times active")
plt.ylabel("Number of columns")
plt.savefig("columnUsage_TM")
plt.close()
In [ ]:
simpleAccuracyTest("periodic", tm, allSequences)
In [ ]:
subSpikeTrains = subSample(spikeTrains, 1000, tm.numberOfCells(), 0, 0)
In [ ]:
isi = computeISI(subSpikeTrains)
In [ ]:
#bins = np.linspace(np.min(isi), np.max(isi), 50)
bins = 200
plt.hist(isi, bins)
plt.xlim(0,800)
# plt.xlim(89500,92000)
plt.xlabel("ISI")
plt.ylabel("Frequency")
plt.savefig("isiTM")
plt.close()
In [ ]:
print np.mean(isi)
print np.std(isi)
print np.std(isi)/np.mean(isi)
In [ ]:
# Generate spike distribution
spikeCount = []
for cell in range(np.shape(subSpikeTrains)[0]):
spikeCount.append(np.count_nonzero(subSpikeTrains[cell,:]))
In [ ]:
bins = 25
plt.hist(spikeCount, bins)
plt.xlim(0, 1100)
plt.xlabel("Spike Count")
plt.ylabel("Number of cells")
plt.savefig("spikesHist")
plt.close()
In [ ]:
subSpikeTrains = subSample(spikeTrains, 100, tm.numberOfCells(), -1, 1000)
In [ ]:
rasterPlot(subSpikeTrains, "TM")
In [ ]:
saveTM(tm)
In [ ]:
# to load the TM back from the file do:
with open('tm.nta', 'rb') as f:
proto2 = TemporalMemoryProto_capnp.TemporalMemoryProto.read(f, traversal_limit_in_words=2**61)
tm = TM.read(proto2)
In [ ]:
overlapMatrix = inputAnalysis(allSequences, "periodic", tm.numberOfColumns())
In [ ]:
# show heatmap of overlap matrix
plt.imshow(overlapMatrix, cmap='spectral', interpolation='nearest')
cb = plt.colorbar()
cb.set_label('Overlap Score')
plt.savefig("overlapScore_heatmap")
plt.close()
# plt.show()
# generate histogram
bins = 60
(n, bins, patches) = plt.hist(overlapMatrix.ravel(), bins, alpha=0.5)
plt.xlabel("Overlap Score")
plt.ylabel("Frequency")
plt.savefig("overlapScore_hist")
plt.xlim(0.1,1)
plt.ylim(0,1000000)
plt.xlabel("Overlap Score")
plt.ylabel("Frequency")
plt.savefig("overlapScore_hist_ZOOM")
plt.close()
In [ ]:
In [ ]: