EXP 4-NoisySinusoid

In this experiment we generate data from a sinusoid with added Gaussian noise. We pass it through a scalar encoder, spatial pooler and then to the TM. We do a single pass to the TM and keep track of the spike trains of each cell. We use this data to estimate pairwise correlations among cells.


In [1]:
import numpy as np
import random
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import pandas as pd

from nupic.encoders import ScalarEncoder
from nupic.bindings.algorithms import TemporalMemory as TM
from nupic.bindings.algorithms import SpatialPooler as SP
from htmresearch.support.neural_correlations_utils import *

random.seed(1)

In [2]:
inputSize = 109
maxItems = 10000
tmEpochs = 1
totalTS = maxItems * tmEpochs

In [3]:
tm = TM(columnDimensions = (2048,),
        cellsPerColumn=8, # originally this value is 32
        initialPermanence=0.21,
        connectedPermanence=0.3,
        minThreshold=15,
        maxNewSynapseCount=40,
        permanenceIncrement=0.1,
        permanenceDecrement=0.1,
        activationThreshold=15,
        predictedSegmentDecrement=0.01
       )

sparsity = 0.02
sparseCols = int(tm.numberOfColumns() * sparsity)

sp = SP(inputDimensions=(inputSize,),
        columnDimensions=(2048,),
        potentialRadius = int(0.5*inputSize),
        numActiveColumnsPerInhArea = sparseCols,
        globalInhibition = True,
        synPermActiveInc = 0.0001,
        synPermInactiveDec = 0.0005,
        synPermConnected = 0.5,
        boostStrength = 0.0,
        spVerbosity = 1
       )

Part I. Encoder


In [4]:
# Generate random data
noiseLevel = 0.25
x = np.linspace(-100, 100, maxItems)
y = np.sin(x)
noise = np.random.normal(0, noiseLevel, maxItems)

noisY = np.sin(x) + noise
plt.plot(x, noisY)
plt.xlabel("x")
plt.ylabel("f(x)")
plt.savefig("rawData")
plt.close()

In [5]:
numTrainingItems = 10000
trainSet = []
nonTrainSet = []

se = ScalarEncoder(n=110, w=21, minval=min(noisY), maxval=max(noisY), clipInput=True)

for i in range(maxItems):
    if i > 0 and i % 1000 == 0:
        print str(i) + " items processed"
    if i < numTrainingItems:
        trainSet.append(se.encode(noisY[i]))
    else:
        nonTrainSet.append(se.encode(noisY[i]))
print "*** All items encoded! ***"


1000 items processed
2000 items processed
3000 items processed
4000 items processed
5000 items processed
6000 items processed
7000 items processed
8000 items processed
9000 items processed
*** All items encoded! ***

Part II. Spatial Pooler


In [6]:
allSequences = []
outputColumns = np.zeros(tm.numberOfColumns(), dtype="uint32")
columnUsage = np.zeros(tm.numberOfColumns(), dtype="uint32")

# Set epochs for spatial-pooling:
spEpochs = 15 # 10

for epoch in range(spEpochs):
    print "Training epoch: " + str(epoch)
    
    #randomize records in training set
    randomIndex = np.random.permutation(np.arange(numTrainingItems))
    
    for i in range(numTrainingItems):
        sp.compute(trainSet[randomIndex[i]], True, outputColumns)
        # Populate array for Yuwei plot:
        for col in outputColumns.nonzero():
            columnUsage[col] += 1                        
        if epoch == (spEpochs - 1):
            allSequences.append(outputColumns.nonzero()) 

for i in range(maxItems - numTrainingItems):
    if i > 0 and i % 500 == 0:
        print str(i) + " items processed"    
    sp.compute(nonTrainSet[i], False, outputColumns)
    allSequences.append(outputColumns.nonzero())
    # Populate array for Yuwei plot:
    for col in outputColumns.nonzero():
        columnUsage[col] += 1                

print "*** All items processed! ***"


Training epoch: 0
Training epoch: 1
Training epoch: 2
Training epoch: 3
Training epoch: 4
Training epoch: 5
Training epoch: 6
Training epoch: 7
Training epoch: 8
Training epoch: 9
Training epoch: 10
Training epoch: 11
Training epoch: 12
Training epoch: 13
Training epoch: 14
*** All items processed! ***

In [7]:
bins = 50
plt.hist(columnUsage, bins)
plt.xlabel("Number of times active")
plt.ylabel("Number of columns")
plt.savefig("columnUsage_SP")
plt.close()

Part III. Temporal Memory


In [8]:
spikeTrains = np.zeros((tm.numberOfCells(), totalTS), dtype = "uint32")
columnUsage = np.zeros(tm.numberOfColumns(), dtype="uint32")
spikeCount = np.zeros(totalTS, dtype="uint32")
ts = 0

entropyX = []
entropyY = []

negPCCX_cells = []
negPCCY_cells = []

numSpikesX = []
numSpikesY = []

numSpikes = 0

negPCCX_cols = []
negPCCY_cols = []

traceX = []
traceY = []

# Randomly generate the indices of the columns to keep track during simulation time
# keep track of 125 columns = 1000 cells
#colIndicesLarge = np.random.permutation(tm.numberOfColumns())[0:125] 

for e in range(tmEpochs):
    print ""
    print "Epoch: " + str(e)
    for s in range(maxItems):
        if s % 1000 == 0:
            print str(s) + " items processed"
        
        tm.compute(allSequences[s][0].tolist(), learn=True)
        for cell in tm.getActiveCells():
            spikeTrains[cell, ts] = 1
            numSpikes += 1
            spikeCount[ts] += 1
            
        # Obtain active columns:
        activeColumnsIndices = [tm.columnForCell(i) for i in tm.getActiveCells()]
        currentColumns = [1 if i in activeColumnsIndices else 0 for i in range(tm.numberOfColumns())]
        for col in np.nonzero(currentColumns)[0]:
            columnUsage[col] += 1                
    
        if ts > 0 and ts % 1000 == 0:
            numSpikesX.append(ts)
            numSpikesY.append(numSpikes)
            
            numSpikes = 0
            subSpikeTrains = subSample(spikeTrains, 1000, tm.numberOfCells(), ts, 1000)
            (corrMatrix, numNegPCC) = computePWCorrelations(subSpikeTrains, removeAutoCorr=True)
            negPCCX_cells.append(ts)
            negPCCY_cells.append(numNegPCC)                
            bins = 300
            plt.hist(corrMatrix.ravel(), bins, alpha=0.5)                
            plt.xlim(-0.1,0.2)
            plt.xlabel("PCC")
            plt.ylabel("Frequency")
            plt.savefig("cellsHist" + str(ts))
            plt.close()

            traceX.append(ts)
            #traceY.append(sum(1 for i in corrMatrix.ravel() if i > 0.5))
            #traceY.append(np.std(corrMatrix))
            #traceY.append(sum(1 for i in corrMatrix.ravel() if i > -0.05 and i < 0.1))            
            traceY.append(sum(1 for i in corrMatrix.ravel() if i > 0.0))
            
            entropyX.append(ts)
            entropyY.append(computeEntropy(subSpikeTrains))

            #print "++ Analyzing correlations (whole columns) ++"
            ### First the LARGE subsample of columns:
            colIndicesLarge = np.random.permutation(tm.numberOfColumns())[0:125] 
            
            subSpikeTrains = subSampleWholeColumn(spikeTrains, colIndicesLarge, tm.getCellsPerColumn(), ts, 1000)
            (corrMatrix, numNegPCC) = computePWCorrelationsWithinCol(subSpikeTrains, True, tm.getCellsPerColumn())
            negPCCX_cols.append(ts)
            negPCCY_cols.append(numNegPCC)                
            #print "++ Generating histogram ++"
            plt.hist(corrMatrix.ravel(), alpha=0.5)
            plt.xlabel("PCC")
            plt.ylabel("Frequency")
            plt.savefig("colsHist_" + str(ts))
            plt.close()                 
            
        ts += 1
    print ""
    
print "*** DONE ***"
# end for-epochs


Epoch: 0
0 items processed
1000 items processed
2000 items processed
3000 items processed
4000 items processed
5000 items processed
6000 items processed
7000 items processed
8000 items processed
9000 items processed

*** DONE ***

In [9]:
plt.plot(traceX, traceY)
plt.xlabel("Time")
plt.ylabel("Positive PCC Count")
plt.savefig("positivePCCTrace")
plt.close()

In [ ]:
sparsityTraceX = []
sparsityTraceY = []
for i in range(totalTS - 1000):
    sparsityTraceX.append(i)
    sparsityTraceY.append(np.mean(spikeCount[i:1000 + i]) / tm.numberOfCells())

plt.plot(sparsityTraceX, sparsityTraceY)
plt.xlabel("Time")
plt.ylabel("Sparsity")
plt.savefig("sparsityTrace")
plt.close()

In [ ]:
# plot trace of negative PCCs
plt.plot(negPCCX_cells, negPCCY_cells)
plt.xlabel("Time")
plt.ylabel("Negative PCC Count")
plt.savefig("negPCCTrace_cells")
plt.close()

plt.plot(negPCCX_cols, negPCCY_cols)
plt.xlabel("Time")
plt.ylabel("Negative PCC Count")
plt.savefig("negPCCTrace_cols")
plt.close()

In [ ]:
plt.plot(numSpikesX, numSpikesY)
plt.xlabel("Time")
plt.ylabel("Num Spikes")
plt.savefig("numSpikesTrace")
plt.close()

In [ ]:
# print computeEntropy()
plt.plot(entropyX, entropyY)
plt.xlabel("Time")
plt.ylabel("Entropy")
plt.savefig("entropyTM")
plt.close()

In [ ]:
bins = 50
plt.hist(columnUsage, bins)
plt.xlabel("Number of times active")
plt.ylabel("Number of columns")
plt.savefig("columnUsage_TM")
plt.close()

Part IV. Analysis of Spike Trains


In [ ]:
simpleAccuracyTest("periodic", tm, allSequences)

In [ ]:
subSpikeTrains = subSample(spikeTrains, 1000, tm.numberOfCells(), 0, 0)

In [ ]:
isi = computeISI(subSpikeTrains)

In [ ]:
#bins = np.linspace(np.min(isi), np.max(isi), 50)
bins = 200
plt.hist(isi, bins)
plt.xlim(0,800)
# plt.xlim(89500,92000)
plt.xlabel("ISI")
plt.ylabel("Frequency")
plt.savefig("isiTM")
plt.close()

In [ ]:
print np.mean(isi)
print np.std(isi)
print np.std(isi)/np.mean(isi)

In [ ]:
# Generate spike distribution
spikeCount = []
for cell in range(np.shape(subSpikeTrains)[0]):
    spikeCount.append(np.count_nonzero(subSpikeTrains[cell,:]))

In [ ]:
bins = 25
plt.hist(spikeCount, bins)
plt.xlim(0, 1100)
plt.xlabel("Spike Count")
plt.ylabel("Number of cells")
plt.savefig("spikesHist")
plt.close()

Raster plots


In [ ]:
subSpikeTrains = subSample(spikeTrains, 100, tm.numberOfCells(), -1, 1000)

In [ ]:
rasterPlot(subSpikeTrains, "TM")

Part V. Save TM


In [ ]:
saveTM(tm)

In [ ]:
# to load the TM back from the file do:
with open('tm.nta', 'rb') as f:
    proto2 = TemporalMemoryProto_capnp.TemporalMemoryProto.read(f, traversal_limit_in_words=2**61)
tm = TM.read(proto2)

Part VI. Analysis of Input


In [ ]:
overlapMatrix = inputAnalysis(allSequences, "periodic", tm.numberOfColumns())

In [ ]:
# show heatmap of overlap matrix
plt.imshow(overlapMatrix, cmap='spectral', interpolation='nearest')
cb = plt.colorbar()
cb.set_label('Overlap Score')
plt.savefig("overlapScore_heatmap")
plt.close()
# plt.show()

# generate histogram
bins = 60
(n, bins, patches) = plt.hist(overlapMatrix.ravel(), bins, alpha=0.5)

plt.xlabel("Overlap Score")
plt.ylabel("Frequency")
plt.savefig("overlapScore_hist")

plt.xlim(0.1,1)
plt.ylim(0,1000000)
plt.xlabel("Overlap Score")
plt.ylabel("Frequency")
plt.savefig("overlapScore_hist_ZOOM")
plt.close()

In [ ]:


In [ ]: