In [1]:
import numpy
In [2]:
from nupic.encoders import ScalarEncoder
ScalarEncoder?
In [3]:
# 22 bits with 3 active representing values 0 to 100
# clipInput=True makes values >100 encode the same as 100 (instead of throwing a ValueError)
# forced=True allows small values for `n` and `w`
enc = ScalarEncoder(n=22, w=3, minval=2.5, maxval=97.5, clipInput=True, forced=True)
print "3 =", enc.encode(3)
print "4 =", enc.encode(4)
print "5 =", enc.encode(5)
In [4]:
# Encode maxval
print "100 =", enc.encode(100)
# See that any larger number gets the same encoding
print "1000 =", enc.encode(1000)
In [5]:
from nupic.encoders.random_distributed_scalar import RandomDistributedScalarEncoder
RandomDistributedScalarEncoder?
In [6]:
# 21 bits with 3 active with buckets of size 5
rdse = RandomDistributedScalarEncoder(n=21, w=3, resolution=5, offset=2.5)
print "3 = ", rdse.encode(3)
print "4 = ", rdse.encode(4)
print "5 = ", rdse.encode(5)
print
print "100 = ", rdse.encode(100)
print "1000 =", rdse.encode(1000)
In [7]:
import datetime
from nupic.encoders.date import DateEncoder
DateEncoder?
In [8]:
de = DateEncoder(season=5)
now = datetime.datetime.strptime("2014-05-02 13:08:58", "%Y-%m-%d %H:%M:%S")
print "now = ", de.encode(now)
nextMonth = datetime.datetime.strptime("2014-06-02 13:08:58", "%Y-%m-%d %H:%M:%S")
print "next month =", de.encode(nextMonth)
xmas = datetime.datetime.strptime("2014-12-25 13:08:58", "%Y-%m-%d %H:%M:%S")
print "xmas = ", de.encode(xmas)
In [9]:
from nupic.encoders.category import CategoryEncoder
categories = ("cat", "dog", "monkey", "slow loris")
encoder = CategoryEncoder(w=3, categoryList=categories, forced=True)
cat = encoder.encode("cat")
dog = encoder.encode("dog")
monkey = encoder.encode("monkey")
loris = encoder.encode("slow loris")
print "cat = ", cat
print "dog = ", dog
print "monkey = ", monkey
print "slow loris =", loris
In [10]:
print encoder.encode(None)
In [11]:
print encoder.encode("unknown")
In [12]:
print encoder.decode(cat)
In [13]:
catdog = numpy.array([0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0])
print encoder.decode(catdog)
In [14]:
from nupic.research.spatial_pooler import SpatialPooler
print SpatialPooler?
In [ ]:
print SpatialPooler
In [15]:
print SpatialPooler
In [16]:
print SpatialPooler
In [17]:
print SpatialPooler
In [18]:
print SpatialPooler
In [19]:
print SpatialPooler
In [20]:
print len(cat)
print cat
In [21]:
sp = SpatialPooler(inputDimensions=(15,),
columnDimensions=(4,),
potentialRadius=15,
numActiveColumnsPerInhArea=1,
globalInhibition=True,
synPermActiveInc=0.03,
potentialPct=1.0)
import numpy
for column in xrange(4):
connected = numpy.zeros((15,), dtype="int")
sp.getConnectedSynapses(column, connected)
print connected
In [22]:
output = numpy.zeros((4,), dtype="int")
sp.compute(cat, learn=True, activeArray=output)
print output
In [23]:
for _ in xrange(20):
sp.compute(cat, learn=True, activeArray=output)
In [24]:
for column in xrange(4):
connected = numpy.zeros((15,), dtype="int")
sp.getConnectedSynapses(column, connected)
print connected
In [25]:
for _ in xrange(200):
sp.compute(cat, learn=True, activeArray=output)
sp.compute(dog, learn=True, activeArray=output)
sp.compute(monkey, learn=True, activeArray=output)
sp.compute(loris, learn=True, activeArray=output)
In [26]:
for column in xrange(4):
connected = numpy.zeros((15,), dtype="int")
sp.getConnectedSynapses(column, connected)
print connected
In [27]:
noisyCat = numpy.zeros((15,), dtype="uint32")
noisyCat[3] = 1
noisyCat[4] = 1
# This is part of dog!
noisyCat[6] = 1
print noisyCat
In [28]:
sp.compute(noisyCat, learn=False, activeArray=output)
print output # matches cat!
In [29]:
from nupic.research.TP import TP
TP?
In [30]:
# Step 1: create Temporal Pooler instance with appropriate parameters
tp = TP(numberOfCols=50, cellsPerColumn=2,
initialPerm=0.5, connectedPerm=0.5,
minThreshold=10, newSynapseCount=10,
permanenceInc=0.1, permanenceDec=0.0,
activationThreshold=8,
globalDecay=0, burnIn=1,
checkSynapseConsistency=False,
pamLength=10)
In [31]:
# Step 2: create input vectors to feed to the temporal pooler. Each input vector
# must be numberOfCols wide. Here we create a simple sequence of 5 vectors
# representing the sequence A -> B -> C -> D -> E
x = numpy.zeros((5, tp.numberOfCols), dtype="uint32")
x[0,0:10] = 1 # Input SDR representing "A", corresponding to columns 0-9
x[1,10:20] = 1 # Input SDR representing "B", corresponding to columns 10-19
x[2,20:30] = 1 # Input SDR representing "C", corresponding to columns 20-29
x[3,30:40] = 1 # Input SDR representing "D", corresponding to columns 30-39
x[4,40:50] = 1 # Input SDR representing "E", corresponding to columns 40-49
In [32]:
# Step 3: send this simple sequence to the temporal pooler for learning
# We repeat the sequence 10 times
for i in range(10):
# Send each letter in the sequence in order
for j in range(5):
# The compute method performs one step of learning and/or inference. Note:
# here we just perform learning but you can perform prediction/inference and
# learning in the same step if you want (online learning).
tp.compute(x[j], enableLearn = True, computeInfOutput = False)
# This function prints the segments associated with every cell.$$$$
# If you really want to understand the TP, uncomment this line. By following
# every step you can get an excellent understanding for exactly how the TP
# learns.
#tp.printCells()
# The reset command tells the TP that a sequence just ended and essentially
# zeros out all the states. It is not strictly necessary but it's a bit
# messier without resets, and the TP learns quicker with resets.
tp.reset()
In [33]:
# Step 4: send the same sequence of vectors and look at predictions made by
# temporal pooler
# Utility routine for printing the input vector
def formatRow(x):
s = ''
for c in range(len(x)):
if c > 0 and c % 10 == 0:
s += ' '
s += str(x[c])
s += ' '
return s
for j in range(5):
print "\n\n--------","ABCDE"[j],"-----------"
print "Raw input vector\n",formatRow(x[j])
# Send each vector to the TP, with learning turned off
tp.compute(x[j], enableLearn=False, computeInfOutput=True)
# This method prints out the active state of each cell followed by the
# predicted state of each cell. For convenience the cells are grouped
# 10 at a time. When there are multiple cells per column the printout
# is arranged so the cells in a column are stacked together
#
# What you should notice is that the columns where active state is 1
# represent the SDR for the current input pattern and the columns where
# predicted state is 1 represent the SDR for the next expected pattern
print "\nAll the active and predicted cells:"
tp.printStates(printPrevious=False, printLearnState=False)
# tp.getPredictedState() gets the predicted cells.
# predictedCells[c][i] represents the state of the i'th cell in the c'th
# column. To see if a column is predicted, we can simply take the OR
# across all the cells in that column. In numpy we can do this by taking
# the max along axis 1.
print "\n\nThe following columns are predicted by the temporal pooler. This"
print "should correspond to columns in the *next* item in the sequence."
predictedCells = tp.getPredictedState()
print formatRow(predictedCells.max(axis=1).nonzero())
In [34]:
model = ModelFactory.create(MODEL_PARAMS)
In [ ]:
# Model Params!
MODEL_PARAMS = {
# Type of model that the rest of these parameters apply to.
'model': "CLA",
# Version that specifies the format of the config.
'version': 1,
# Intermediate variables used to compute fields in modelParams and also
# referenced from the control section.
'aggregationInfo': { 'days': 0,
'fields': [('consumption', 'sum')],
'hours': 1,
'microseconds': 0,
'milliseconds': 0,
'minutes': 0,
'months': 0,
'seconds': 0,
'weeks': 0,
'years': 0},
'predictAheadTime': None,
# Model parameter dictionary.
'modelParams': {
# The type of inference that this model will perform
'inferenceType': 'TemporalMultiStep',
'sensorParams': {
# Sensor diagnostic output verbosity control;
# if > 0: sensor region will print out on screen what it's sensing
# at each step 0: silent; >=1: some info; >=2: more info;
# >=3: even more info (see compute() in py/regions/RecordSensor.py)
'verbosity' : 0,
# Include the encoders we use
'encoders': {
u'timestamp_timeOfDay': {
'fieldname': u'timestamp',
'name': u'timestamp_timeOfDay',
'timeOfDay': (21, 0.5),
'type': 'DateEncoder'
},
u'timestamp_dayOfWeek': None,
u'timestamp_weekend': None,
u'consumption': {
'clipInput': True,
'fieldname': u'consumption',
'maxval': 100.0,
'minval': 0.0,
'n': 50,
'name': u'c1',
'type': 'ScalarEncoder',
'w': 21
},
},
# A dictionary specifying the period for automatically-generated
# resets from a RecordSensor;
#
# None = disable automatically-generated resets (also disabled if
# all of the specified values evaluate to 0).
# Valid keys is the desired combination of the following:
# days, hours, minutes, seconds, milliseconds, microseconds, weeks
#
# Example for 1.5 days: sensorAutoReset = dict(days=1,hours=12),
#
# (value generated from SENSOR_AUTO_RESET)
'sensorAutoReset' : None,
},
'spEnable': True,
'spParams': {
# SP diagnostic output verbosity control;
# 0: silent; >=1: some info; >=2: more info;
'spVerbosity' : 0,
# Spatial Pooler implementation selector, see getSPClass
# in py/regions/SPRegion.py for details
# 'py', 'oldpy' (default), 'cpp' (speed optimized, new)
'spatialImp' : 'cpp',
'globalInhibition': 1,
# Number of cell columns in the cortical region (same number for
# SP and TP)
# (see also tpNCellsPerCol)
'columnCount': 2048,
'inputWidth': 0,
# SP inhibition control (absolute value);
# Maximum number of active columns in the SP region's output (when
# there are more, the weaker ones are suppressed)
'numActivePerInhArea': 40,
'seed': 1956,
# coincInputPoolPct
# What percent of the columns's receptive field is available
# for potential synapses. At initialization time, we will
# choose coincInputPoolPct * (2*coincInputRadius+1)^2
'coincInputPoolPct': 0.5,
# The default connected threshold. Any synapse whose
# permanence value is above the connected threshold is
# a "connected synapse", meaning it can contribute to the
# cell's firing. Typical value is 0.10. Cells whose activity
# level before inhibition falls below minDutyCycleBeforeInh
# will have their own internal synPermConnectedCell
# threshold set below this default value.
# (This concept applies to both SP and TP and so 'cells'
# is correct here as opposed to 'columns')
'synPermConnected': 0.1,
'synPermActiveInc': 0.1,
'synPermInactiveDec': 0.005,
},
# Controls whether TP is enabled or disabled;
# TP is necessary for making temporal predictions, such as predicting
# the next inputs. Without TP, the model is only capable of
# reconstructing missing sensor inputs (via SP).
'tpEnable' : True,
'tpParams': {
# TP diagnostic output verbosity control;
# 0: silent; [1..6]: increasing levels of verbosity
# (see verbosity in nupic/trunk/py/nupic/research/TP.py and TP10X*.py)
'verbosity': 0,
# Number of cell columns in the cortical region (same number for
# SP and TP)
# (see also tpNCellsPerCol)
'columnCount': 2048,
# The number of cells (i.e., states), allocated per column.
'cellsPerColumn': 32,
'inputWidth': 2048,
'seed': 1960,
# Temporal Pooler implementation selector (see _getTPClass in
# CLARegion.py).
'temporalImp': 'cpp',
# New Synapse formation count
# NOTE: If None, use spNumActivePerInhArea
#
# TODO: need better explanation
'newSynapseCount': 20,
# Maximum number of synapses per segment
# > 0 for fixed-size CLA
# -1 for non-fixed-size CLA
#
# TODO: for Ron: once the appropriate value is placed in TP
# constructor, see if we should eliminate this parameter from
# description.py.
'maxSynapsesPerSegment': 32,
# Maximum number of segments per cell
# > 0 for fixed-size CLA
# -1 for non-fixed-size CLA
#
# TODO: for Ron: once the appropriate value is placed in TP
# constructor, see if we should eliminate this parameter from
# description.py.
'maxSegmentsPerCell': 128,
# Initial Permanence
# TODO: need better explanation
'initialPerm': 0.21,
# Permanence Increment
'permanenceInc': 0.1,
# Permanence Decrement
# If set to None, will automatically default to tpPermanenceInc
# value.
'permanenceDec' : 0.1,
'globalDecay': 0.0,
'maxAge': 0,
# Minimum number of active synapses for a segment to be considered
# during search for the best-matching segments.
# None=use default
# Replaces: tpMinThreshold
'minThreshold': 9,
# Segment activation threshold.
# A segment is active if it has >= tpSegmentActivationThreshold
# connected synapses that are active due to infActiveState
# None=use default
# Replaces: tpActivationThreshold
'activationThreshold': 12,
'outputType': 'normal',
# "Pay Attention Mode" length. This tells the TP how many new
# elements to append to the end of a learned sequence at a time.
# Smaller values are better for datasets with short sequences,
# higher values are better for datasets with long sequences.
'pamLength': 1,
},
'clParams': {
'regionName' : 'CLAClassifierRegion',
# Classifier diagnostic output verbosity control;
# 0: silent; [1..6]: increasing levels of verbosity
'clVerbosity' : 0,
# This controls how fast the classifier learns/forgets. Higher values
# make it adapt faster and forget older patterns faster.
'alpha': 0.005,
# This is set after the call to updateConfigFromSubConfig and is
# computed from the aggregationInfo and predictAheadTime.
'steps': '1,5',
'implementation': 'cpp',
},
'trainSPNetOnlyIfRequested': False,
},
}
In [ ]:
from nupic.data.datasethelpers import findDataset
datasetPath = findDataset("extra/hotgym/hotgym.csv")
print datasetPath
with open(datasetPath) as inputFile:
print
for _ in xrange(8):
print inputFile.next().strip()
In [ ]:
from nupic.data.file_record_stream import FileRecordStream
def getData():
return FileRecordStream(datasetPath)
data = getData()
for _ in xrange(5):
print data.next()
In [ ]:
from nupic.frameworks.opf.modelfactory import ModelFactory
model = ModelFactory.create(MODEL_PARAMS)
model.enableInference({'predictedField': 'consumption'})
In [ ]:
data = getData()
for _ in xrange(100):
record = dict(zip(data.getFieldNames(), data.next()))
print "input: ", record["consumption"]
result = model.run(record)
print "prediction: ", result.inferences["multiStepBestPredictions"][1]
In [ ]:
print "5-step prediction: ", result.inferences["multiStepBestPredictions"][5]
In [ ]:
# Model Params!
MODEL_PARAMS = {
# Type of model that the rest of these parameters apply to.
'model': "CLA",
# Version that specifies the format of the config.
'version': 1,
# Intermediate variables used to compute fields in modelParams and also
# referenced from the control section.
'aggregationInfo': { 'days': 0,
'fields': [('consumption', 'sum')],
'hours': 1,
'microseconds': 0,
'milliseconds': 0,
'minutes': 0,
'months': 0,
'seconds': 0,
'weeks': 0,
'years': 0},
'predictAheadTime': None,
# Model parameter dictionary.
'modelParams': {
# The type of inference that this model will perform
'inferenceType': 'TemporalAnomaly',
'sensorParams': {
# Sensor diagnostic output verbosity control;
# if > 0: sensor region will print out on screen what it's sensing
# at each step 0: silent; >=1: some info; >=2: more info;
# >=3: even more info (see compute() in py/regions/RecordSensor.py)
'verbosity' : 0,
# Include the encoders we use
'encoders': {
u'timestamp_timeOfDay': {
'fieldname': u'timestamp',
'name': u'timestamp_timeOfDay',
'timeOfDay': (21, 0.5),
'type': 'DateEncoder'},
u'timestamp_dayOfWeek': None,
u'timestamp_weekend': None,
u'consumption': {
'clipInput': True,
'fieldname': u'consumption',
'maxval': 100.0,
'minval': 0.0,
'n': 50,
'name': u'c1',
'type': 'ScalarEncoder',
'w': 21},},
# A dictionary specifying the period for automatically-generated
# resets from a RecordSensor;
#
# None = disable automatically-generated resets (also disabled if
# all of the specified values evaluate to 0).
# Valid keys is the desired combination of the following:
# days, hours, minutes, seconds, milliseconds, microseconds, weeks
#
# Example for 1.5 days: sensorAutoReset = dict(days=1,hours=12),
#
# (value generated from SENSOR_AUTO_RESET)
'sensorAutoReset' : None,
},
'spEnable': True,
'spParams': {
# SP diagnostic output verbosity control;
# 0: silent; >=1: some info; >=2: more info;
'spVerbosity' : 0,
# Spatial Pooler implementation selector, see getSPClass
# in py/regions/SPRegion.py for details
# 'py', 'oldpy' (default), 'cpp' (speed optimized, new)
'spatialImp' : 'cpp',
'globalInhibition': 1,
# Number of cell columns in the cortical region (same number for
# SP and TP)
# (see also tpNCellsPerCol)
'columnCount': 2048,
'inputWidth': 0,
# SP inhibition control (absolute value);
# Maximum number of active columns in the SP region's output (when
# there are more, the weaker ones are suppressed)
'numActivePerInhArea': 40,
'seed': 1956,
# coincInputPoolPct
# What percent of the columns's receptive field is available
# for potential synapses. At initialization time, we will
# choose coincInputPoolPct * (2*coincInputRadius+1)^2
'coincInputPoolPct': 0.5,
# The default connected threshold. Any synapse whose
# permanence value is above the connected threshold is
# a "connected synapse", meaning it can contribute to the
# cell's firing. Typical value is 0.10. Cells whose activity
# level before inhibition falls below minDutyCycleBeforeInh
# will have their own internal synPermConnectedCell
# threshold set below this default value.
# (This concept applies to both SP and TP and so 'cells'
# is correct here as opposed to 'columns')
'synPermConnected': 0.1,
'synPermActiveInc': 0.1,
'synPermInactiveDec': 0.005,
},
# Controls whether TP is enabled or disabled;
# TP is necessary for making temporal predictions, such as predicting
# the next inputs. Without TP, the model is only capable of
# reconstructing missing sensor inputs (via SP).
'tpEnable' : True,
'tpParams': {
# TP diagnostic output verbosity control;
# 0: silent; [1..6]: increasing levels of verbosity
# (see verbosity in nupic/trunk/py/nupic/research/TP.py and TP10X*.py)
'verbosity': 0,
# Number of cell columns in the cortical region (same number for
# SP and TP)
# (see also tpNCellsPerCol)
'columnCount': 2048,
# The number of cells (i.e., states), allocated per column.
'cellsPerColumn': 32,
'inputWidth': 2048,
'seed': 1960,
# Temporal Pooler implementation selector (see _getTPClass in
# CLARegion.py).
'temporalImp': 'cpp',
# New Synapse formation count
# NOTE: If None, use spNumActivePerInhArea
#
# TODO: need better explanation
'newSynapseCount': 20,
# Maximum number of synapses per segment
# > 0 for fixed-size CLA
# -1 for non-fixed-size CLA
#
# TODO: for Ron: once the appropriate value is placed in TP
# constructor, see if we should eliminate this parameter from
# description.py.
'maxSynapsesPerSegment': 32,
# Maximum number of segments per cell
# > 0 for fixed-size CLA
# -1 for non-fixed-size CLA
#
# TODO: for Ron: once the appropriate value is placed in TP
# constructor, see if we should eliminate this parameter from
# description.py.
'maxSegmentsPerCell': 128,
# Initial Permanence
# TODO: need better explanation
'initialPerm': 0.21,
# Permanence Increment
'permanenceInc': 0.1,
# Permanence Decrement
# If set to None, will automatically default to tpPermanenceInc
# value.
'permanenceDec' : 0.1,
'globalDecay': 0.0,
'maxAge': 0,
# Minimum number of active synapses for a segment to be considered
# during search for the best-matching segments.
# None=use default
# Replaces: tpMinThreshold
'minThreshold': 9,
# Segment activation threshold.
# A segment is active if it has >= tpSegmentActivationThreshold
# connected synapses that are active due to infActiveState
# None=use default
# Replaces: tpActivationThreshold
'activationThreshold': 12,
'outputType': 'normal',
# "Pay Attention Mode" length. This tells the TP how many new
# elements to append to the end of a learned sequence at a time.
# Smaller values are better for datasets with short sequences,
# higher values are better for datasets with long sequences.
'pamLength': 1,
},
'clParams': {
'regionName' : 'CLAClassifierRegion',
# Classifier diagnostic output verbosity control;
# 0: silent; [1..6]: increasing levels of verbosity
'clVerbosity' : 0,
# This controls how fast the classifier learns/forgets. Higher values
# make it adapt faster and forget older patterns faster.
'alpha': 0.005,
# This is set after the call to updateConfigFromSubConfig and is
# computed from the aggregationInfo and predictAheadTime.
'steps': '1',
'implementation': 'cpp',
},
'anomalyParams': {
u'anomalyCacheRecords': None,
u'autoDetectThreshold': None,
u'autoDetectWaitRecords': 2184
},
'trainSPNetOnlyIfRequested': False,
},
}
In [ ]:
from nupic.frameworks.opf.modelfactory import ModelFactory
model = ModelFactory.create(MODEL_PARAMS)
model.enableInference({'predictedField': 'consumption'})
In [ ]:
data = getData()
for _ in xrange(5):
record = dict(zip(data.getFieldNames(), data.next()))
print "input: ", record["consumption"]
result = model.run(record)
print "prediction: ", result.inferences["multiStepBestPredictions"][1]
In [ ]:
print result
In [ ]:
print "anomaly score: ", result.inferences["anomalyScore"]
See Subutai's talk for more info on anomaly detection!
python examples/opf/bin/OpfRunExperiment.py examples/opf/experiments/multistep/hotgym/
Outputs examples/opf/experiments/multistep/hotgym/inference/DefaultTask.TemporalMultiStep.predictionLog.csv
python bin/run_swarm.py examples/opf/experiments/multistep/hotgym/permutations.py
Outputs examples/opf/experiments/multistep/hotgym/model_0/description.py
In [ ]: