``````

In [31]:

import numpy
import pandas
from random import shuffle

``````
``````

In [14]:

import nupic
from nupic.encoders import ScalarEncoder

``````
``````

In [12]:

# 22 bits with 3 active representing values 0 to 100
# clipInput=True makes values >100 encode the same as 100 (instead of throwing a ValueError)
# forced=True allows small values for `n` and `w`
enc = ScalarEncoder(n=22, w=3, minval=2.5, maxval=97.5, clipInput=True, forced=True)
print "3 =", enc.encode(3)
print "4 =", enc.encode(4)
print "5 =", enc.encode(5)

``````
``````

3 = [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
4 = [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
5 = [0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

``````
``````

In [267]:

# Encode maxval
print "100  =", enc.encode(100)
# See that any larger number gets the same encoding
print "1000 =", enc.encode(1000)

``````
``````

100  = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1]
1000 = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1]

``````
``````

In [268]:

from nupic.encoders.random_distributed_scalar import RandomDistributedScalarEncoder

RandomDistributedScalarEncoder?

``````
``````

In [269]:

# 21 bits with 3 active with buckets of size 5
rdse = RandomDistributedScalarEncoder(n=21, w=3, resolution=5, offset=2.5)

print "3 =   ", rdse.encode(3)
print "4 =   ", rdse.encode(4)
print "5 =   ", rdse.encode(5)
print
print "100 = ", rdse.encode(100)
print "1000 =", rdse.encode(1000)

``````
``````

3 =    [0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0]
4 =    [0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0]
5 =    [0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0]

100 =  [0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1]
1000 = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0]

``````
``````

In [17]:

import datetime
from nupic.encoders.date import DateEncoder

DateEncoder?

``````
``````

In [29]:

import numpy
from nupic.research.spatial_pooler import SpatialPooler
sp = SpatialPooler(inputDimensions=(15,),
columnDimensions=(4,),
numActiveColumnsPerInhArea=1,
globalInhibition=True,
synPermActiveInc=0.03,
potentialPct=1.0)
for column in xrange(4):
connected = numpy.zeros((15,), dtype="int")
sp.getConnectedSynapses(column, connected)
print connected

``````
``````

[0 0 0 1 0 0 1 0 0 1 0 0 0 0 1]
[1 0 1 0 1 1 1 1 1 1 0 1 1 0 1]
[1 1 1 0 0 0 1 1 0 1 1 0 0 1 0]
[1 0 0 0 0 1 0 0 0 1 1 1 1 1 0]

``````

# Extract Data for Training Examples

``````

In [ ]:

``````

# Encode Date

``` Day of Week (14 bits) [1 1 1 0 0 0 0 0 0 0 0 0 0 0] Sun [0 0 1 1 1 0 0 0 0 0 0 0 0 0] Mon [0 0 0 0 1 1 1 0 0 0 0 0 0 0] Tue [0 0 0 0 0 0 1 1 1 0 0 0 0 0] Wed [0 0 0 0 0 0 0 0 1 1 1 0 0 0] Thu [0 0 0 0 0 0 0 0 0 0 1 1 1 0] Fri [1 0 0 0 0 0 0 0 0 0 0 0 1 1] Sat Day of Month (63 bits) [111000000000000000000000000000000000000000000000000000000000000] 1 [001110000000000000000000000000000000000000000000000000000000000] 2 [000011100000000000000000000000000000000000000000000000000000000] 3 ... [000000000000000000000000000000000000000000000000000000000011100] 30 [000000000000000000000000000000000000000000000000000000000000111] 31 First/Last Day of Month (3 bits) [1 0 0] First [0 1 0] Neither [0 0 1] Last Week of Month (12 bits) [1 1 1 0 0 0 0 0 0 0 0 0] Wk 1 [0 0 1 1 1 0 0 0 0 0 0 0] Wk 2 [0 0 0 0 1 1 1 0 0 0 0 0] Wk 3 [0 0 0 0 0 0 1 1 1 0 0 0] Wk 4 [0 0 0 0 0 0 0 0 1 1 1 0] Wk 5 [1 0 0 0 0 0 0 0 0 0 1 1] Wk 6 Month of Year (24 bits) [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] Jan [0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] Feb [0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] Mar [0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] Apr [0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0] May [0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0] Jun [0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0] Jul [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0] Aug [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0] Sep [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0] Oct [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0] Nov [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1] Dec Quarter of Year (8 bits) [1 1 1 0 0 0 0 0] Q1 [0 0 1 1 1 0 0 0] Q2 [0 0 0 0 1 1 1 0] Q3 [1 0 0 0 0 0 1 1] Q4 Half of Year [1 0] H1 (2 bits) [0 1] H2 Year of Decade (20 bits) [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] 0 [0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] 1 [0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0] 2 [0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0] 3 [0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0] 4 [0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0] 5 [0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0] 6 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0] 7 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0] 8 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1] 9 ```

``````

In [15]:

import nupic
from nupic.encoders import ScalarEncoder
day_of_week_enc = ScalarEncoder(w=3, minval=0, maxval=7, radius=1.5, periodic=True, name="dayOfWeek", forced=True)
for d in range(0, 7):
print str(day_of_week_enc.encode(d))

``````
``````

[1 1 0 0 0 0 0 0 0 0 0 0 0 1]
[0 1 1 1 0 0 0 0 0 0 0 0 0 0]
[0 0 0 1 1 1 0 0 0 0 0 0 0 0]
[0 0 0 0 0 1 1 1 0 0 0 0 0 0]
[0 0 0 0 0 0 0 1 1 1 0 0 0 0]
[0 0 0 0 0 0 0 0 0 1 1 1 0 0]
[0 0 0 0 0 0 0 0 0 0 0 1 1 1]

``````
``````

In [16]:

day_of_month_enc = ScalarEncoder(w=3, minval=1, maxval=31, radius=1.5, periodic=False, name="dayOfMonth", forced=True)
for d in range(1, 32):
print str(day_of_month_enc.encode(d)).replace("\n","").replace(" ","")

``````
``````

[111000000000000000000000000000000000000000000000000000000000000]
[001110000000000000000000000000000000000000000000000000000000000]
[000011100000000000000000000000000000000000000000000000000000000]
[000000111000000000000000000000000000000000000000000000000000000]
[000000001110000000000000000000000000000000000000000000000000000]
[000000000011100000000000000000000000000000000000000000000000000]
[000000000000111000000000000000000000000000000000000000000000000]
[000000000000001110000000000000000000000000000000000000000000000]
[000000000000000011100000000000000000000000000000000000000000000]
[000000000000000000111000000000000000000000000000000000000000000]
[000000000000000000001110000000000000000000000000000000000000000]
[000000000000000000000011100000000000000000000000000000000000000]
[000000000000000000000000111000000000000000000000000000000000000]
[000000000000000000000000001110000000000000000000000000000000000]
[000000000000000000000000000011100000000000000000000000000000000]
[000000000000000000000000000000111000000000000000000000000000000]
[000000000000000000000000000000001110000000000000000000000000000]
[000000000000000000000000000000000011100000000000000000000000000]
[000000000000000000000000000000000000111000000000000000000000000]
[000000000000000000000000000000000000001110000000000000000000000]
[000000000000000000000000000000000000000011100000000000000000000]
[000000000000000000000000000000000000000000111000000000000000000]
[000000000000000000000000000000000000000000001110000000000000000]
[000000000000000000000000000000000000000000000011100000000000000]
[000000000000000000000000000000000000000000000000111000000000000]
[000000000000000000000000000000000000000000000000001110000000000]
[000000000000000000000000000000000000000000000000000011100000000]
[000000000000000000000000000000000000000000000000000000111000000]
[000000000000000000000000000000000000000000000000000000001110000]
[000000000000000000000000000000000000000000000000000000000011100]
[000000000000000000000000000000000000000000000000000000000000111]

``````
``````

In [17]:

print len('111000000000000000000000000000000000000000000000000000000000000')

``````
``````

63

``````
``````

In [18]:

first_last_of_month_enc = ScalarEncoder(w=1, minval=0, maxval=2, radius=1, periodic=False, name="firstLastOfMonth", forced=True)
for d in range(0, 3):
print str(first_last_of_month_enc.encode(d))

``````
``````

[1 0 0]
[0 1 0]
[0 0 1]

``````
``````

In [19]:

week_of_month_enc = ScalarEncoder(w=3, minval=0, maxval=6, radius=1.5, periodic=True, name="weekOfMonth", forced=True)
for d in range(0, 6):
print str(week_of_month_enc.encode(d))

``````
``````

[1 1 0 0 0 0 0 0 0 0 0 1]
[0 1 1 1 0 0 0 0 0 0 0 0]
[0 0 0 1 1 1 0 0 0 0 0 0]
[0 0 0 0 0 1 1 1 0 0 0 0]
[0 0 0 0 0 0 0 1 1 1 0 0]
[0 0 0 0 0 0 0 0 0 1 1 1]

``````
``````

In [20]:

month_of_year_enc = ScalarEncoder(w=3, minval=1, maxval=13, radius=1.5, periodic=True, name="monthOfYear", forced=True)
for d in range(1, 13):
print str(month_of_year_enc.encode(d))

``````
``````

[1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
[0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1]

``````
``````

In [21]:

quarter_of_year_enc = ScalarEncoder(w=3, minval=0, maxval=4, radius=1.5, periodic=True, name="quarterOfYear", forced=True)
for d in range(0, 4):
print str(quarter_of_year_enc.encode(d))

``````
``````

[1 1 0 0 0 0 0 1]
[0 1 1 1 0 0 0 0]
[0 0 0 1 1 1 0 0]
[0 0 0 0 0 1 1 1]

``````
``````

In [22]:

half_of_year_enc = ScalarEncoder(w=1, minval=0, maxval=2, radius=1, periodic=True, name="halfOfYear", forced=True)
for d in range(0, 2):
print str(half_of_year_enc.encode(d))

``````
``````

[1 0]
[0 1]

``````
``````

In [23]:

for d in range(0, 10):

``````
``````

[1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
[0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1]

``````
``````

In [24]:

import calendar
class Month(object):
def __init__(self, date):
monthrange = calendar.monthrange(date.year, date.month)
self.first_day_of_week = monthrange[0]
self.last_day = monthrange[1]
self.quarter = (date.month - 1) // 3
self.half = (date.month - 1) // 6

``````
``````

In [25]:

def get_week_of_month(date):
month = Month(date)
week_of_month = (date.day - 1 + month.first_day_of_week) // 7
first_full_week_offset = 1 if month.first_day_of_week == 0 else 0  # month starts on Monday
return week_of_month + first_full_week_offset

# for m in range(1,13):
#     for d in range(1, calendar.monthrange(2015, m)[1] + 1):
#         date = datetime.date(2015, m, d)
#         print date, get_week_of_month(date)

``````
``````

In [26]:

import calendar
[calendar.monthrange(year,month) for year in [2008] for month in range(1,13)]

``````
``````

Out[26]:

[(1, 31),
(4, 29),
(5, 31),
(1, 30),
(3, 31),
(6, 30),
(1, 31),
(4, 31),
(0, 30),
(2, 31),
(5, 30),
(0, 31)]

``````
``````

In [27]:

import datetime
d = datetime.date(2015, 7, 1)
# d = datetime.date.today()
print 'dayOfWeek', d.weekday(), day_of_week_enc.encode(d.weekday())  # 0 = Monday, 6 = Sunday
print 'dayOfMonth', d.day, str(day_of_month_enc.encode(d.day)).replace("\n","").replace(" ","")
first_last = 0 if d.day == 1 else 2 if d.day == Month(d).last_day else 1
print 'firstLastOfMonth', first_last, first_last_of_month_enc.encode(first_last)
print 'weekOfMonth', get_week_of_month(d), week_of_month_enc.encode(get_week_of_month(d))
print 'monthOfYear', d.month, month_of_year_enc.encode(d.month)
print 'quarterOfYear', Month(d).quarter, quarter_of_year_enc.encode(Month(d).quarter)
print 'halfOfYear', Month(d).half, half_of_year_enc.encode(Month(d).half)
# Quarter of Year (8 bits)
# [1 1 1 0 0 0 0 0] Q1
# [0 0 1 1 1 0 0 0] Q2
# [0 0 0 0 1 1 1 0] Q3
# [1 0 0 0 0 0 1 1] Q4
# Half of Year
# [1 0] H1 (2 bits)
# [0 1] H2
class DateRecord(object):
def __init__(self, date):
month = Month(date)
self.dayOfWeek = date.weekday()
self.dayOfMonth = date.day
self.firstLastOfMonth = 0 if date.day == 1 else 2 if date.day == month.last_day else 1
self.weekOfMonth = get_week_of_month(date)
self.monthOfYear = date.month
self.quarterOfYear = month.quarter
self.halfOfYear = month.half

``````
``````

dayOfWeek 2 [0 0 0 1 1 1 0 0 0 0 0 0 0 0]
dayOfMonth 1 [111000000000000000000000000000000000000000000000000000000000000]
firstLastOfMonth 0 [1 0 0]
weekOfMonth 0 [1 1 0 0 0 0 0 0 0 0 0 1]
yearOfDecade 5 [0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0]
monthOfYear 7 [0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0]
quarterOfYear 2 [0 0 0 1 1 1 0 0]
halfOfYear 1 [0 1]

``````
``````

In [32]:

# Generate all days from 1998-2012
# Repeat:
#   Shuffle the list randomly
#   Present each day to SP
#   Record output of SP in map from day to SDR
#   Keep track of whether the SDR changed from the previous presentation
#   Graph number of changes vs number of presentations
days = [datetime.date(y, m, d) for y in range(1998, 2013) for m in range(1, 13) for d in range(1, calendar.monthrange(y, m)[1] + 1)]
print days[0], days[1], days[-2], days[-1]

from nupic.encoders import MultiEncoder
date_enc = MultiEncoder()

sp = SpatialPooler(inputDimensions=(date_enc.width,),
columnDimensions=(400,),
numActiveColumnsPerInhArea=40,
globalInhibition=True,
synPermActiveInc=0.03,
potentialPct=1.0)

with open("output.txt", "w") as f:
output = numpy.zeros((400,), dtype="int")
prev_output = None
for pres in xrange(2):
shuffle(days)
print days[0], days[1], days[-2], days[-1]
for d in days:
#prev_output = None  # reset prev_output between days
for i in range(1):  # repeat each day to see if overlap goes up from learning
inp = numpy.concatenate(date_enc.encodeEachField(DateRecord(d)))
sp.compute(inp, learn=True, activeArray=output)
if prev_output:
nz = set(numpy.concatenate(numpy.nonzero(output)))
f.write(str(d) + " overlap = " + str(len(prev_output & nz)) + "\n")
prev_output = nz
else:
prev_output = set(numpy.concatenate(numpy.nonzero(output)))

``````
``````

1998-01-01 1998-01-02 2012-12-30 2012-12-31
2002-01-21 2003-07-16 2012-03-02 2004-12-07
1998-12-28 2006-12-04 2009-08-11 2008-05-04

``````

TODO

Run 500 presentations with shuffling to refine the training

Run 1 presentation without shuffling to capture baseline

Run 1 presentation without shuffling to capture test

compare

``````

In [225]:

days = [datetime.date(y, m, d) for y in range(1998, 2013) for m in range(1, 13) for d in range(1, calendar.monthrange(y, m)[1] + 1)]

with open("output.txt", "w") as f:
output = numpy.zeros((400,), dtype="int")
prev_output = None
for pres in xrange(5000):
shuffle(days)
#print days[0], days[1], days[-2], days[-1]
for d in days:
#prev_output = None  # reset prev_output between days
for i in range(1):  # repeat each day to see if overlap goes up from learning
inp = numpy.concatenate(date_enc.encodeEachField(DateRecord(d)))
sp.compute(inp, learn=True, activeArray=output)
if prev_output:
nz = set(numpy.concatenate(numpy.nonzero(output)))
f.write(str(d) + " overlap = " + str(len(prev_output & nz)) + "\n")
prev_output = nz
else:
prev_output = set(numpy.concatenate(numpy.nonzero(output)))
print 'Done'

``````
``````

Done

``````
``````

In [193]:

days = [datetime.date(y, m, d) for y in range(1998, 2013) for m in range(1, 13) for d in range(1, calendar.monthrange(y, m)[1] + 1)]

with open("output.txt", "w") as f:
prev_output = None
shuffle(days)
print days[0], days[1], days[-2], days[-1]
for d in days:
for i in range(2):
output = numpy.concatenate(date_enc.encodeEachField(DateRecord(d)))
if prev_output:
nz = set(numpy.concatenate(numpy.nonzero(output)))
f.write(str(d) + " overlap = " + str(len(prev_output & nz)) + "\n")
prev_output = nz
else:
prev_output = set(numpy.concatenate(numpy.nonzero(output)))

``````
``````

2002-01-30 2010-09-12 2011-07-03 2005-10-30

``````
``````

In [ ]:

days = [datetime.date(y, m, d) for y in range(1998, 2013) for m in range(1, 13) for d in range(1, calendar.monthrange(y, m)[1] + 1)]

with open("dates.csv", "w") as f:
f.write("date\n")
f.write("type\b")
f.write("\n")  # flags
for d in days:
f.write(str(numpy.concatenate(date_enc.encodeEachField(DateRecord(d)))) + "\n")

``````
``````

In [261]:

%matplotlib inline

import matplotlib
import numpy as np
import matplotlib.pyplot as plt

def encode_date(date):
return numpy.concatenate(date_enc.encodeEachField(DateRecord(date)))

def encode_bits(array):
return set(numpy.concatenate(numpy.nonzero(array)))

def encode_date_bits(date):
return encode_bits(encode_date(date))

year = 2015
day = datetime.date(year, 5, 31)
#jan1 = datetime.date(year, 1, 1)

days_in_year = [datetime.date(y, m, d) for y in [year-1, year, year+1] for m in range(1,13) for d in range(1, calendar.monthrange(y, m)[1] + 1)]
#days_in_year = [datetime.date(y, m, d) for y in range(year-15,year+16) for m in range(1,13) for d in range(1, calendar.monthrange(y, m)[1] + 1)]
day_bits = encode_date_bits(day)
overlap = [len(day_bits & encode_date_bits(date)) for date in days_in_year]
plt.plot(days_in_year, overlap)

``````
``````

Out[261]:

[<matplotlib.lines.Line2D at 0x7f587ca51bd0>]

``````
``````

In [262]:

output = numpy.zeros((400,), dtype="int")
sp.compute(encode_date(day), learn=False, activeArray=output)
sp_day_bits = encode_bits(output)
overlap = []
for date in days_in_year:
sp.compute(encode_date(date), learn=False, activeArray=output)
overlap.append(len(sp_day_bits & encode_bits(output)))
plt.plot(days_in_year, overlap)

``````
``````

Out[262]:

[<matplotlib.lines.Line2D at 0x7f587c980150>]

``````
``````

In [229]:

# sp2 = SpatialPooler(inputDimensions=(date_enc.width,),
#                    columnDimensions=(400,),
#                    numActiveColumnsPerInhArea=40,
#                    globalInhibition=True,
#                    synPermActiveInc=0.03,
#                    potentialPct=1.0)

output = numpy.zeros((400,), dtype="int")
sp2.compute(encode_date(day), learn=True, activeArray=output)
sp_day_bits = encode_bits(output)
overlap = []
for date in days_in_year:
sp2.compute(encode_date(date), learn=False, activeArray=output)
overlap.append(len(sp_day_bits & encode_bits(output)))
print max(overlap)
plt.plot(days_in_year, overlap)

``````
``````

40

Out[229]:

[<matplotlib.lines.Line2D at 0x7f587e3469d0>]

``````
``````

In [224]:

import pickle
with open("sp_date_500.p", "w") as f:
pickle.dump(sp, f)

``````
``````

In [132]:

x = np.linspace(0, 3*np.pi, 500)
plt.plot(x, np.sin(x**2))
plt.title('A simple chirp')
plt.show()

``````
``````

``````

# Encode Open, High, Low, Close

Encode day-on-day differences in each descriptor: Open-Open, High-High, Low-Low, Close-Close.

``````

In [33]:

import nupic
from nupic.encoders import ScalarEncoder
enc = ScalarEncoder(n=45, w=5, minval=-20, maxval=20, clipInput=True, forced=True)
# range for price descriptors
mrange = (-20, 20)  # not interested in specifics of daily price changes up or down greater than \$20
precision = 1.0  # not interested in price changes of less than \$1.00
for i in range(-20, 21):
print str(enc.encode(i)).replace(" ", "").replace("\n", "") + "=" + str(i)

``````
``````

[111110000000000000000000000000000000000000000]=-20
[011111000000000000000000000000000000000000000]=-19
[001111100000000000000000000000000000000000000]=-18
[000111110000000000000000000000000000000000000]=-17
[000011111000000000000000000000000000000000000]=-16
[000001111100000000000000000000000000000000000]=-15
[000000111110000000000000000000000000000000000]=-14
[000000011111000000000000000000000000000000000]=-13
[000000001111100000000000000000000000000000000]=-12
[000000000111110000000000000000000000000000000]=-11
[000000000011111000000000000000000000000000000]=-10
[000000000001111100000000000000000000000000000]=-9
[000000000000111110000000000000000000000000000]=-8
[000000000000011111000000000000000000000000000]=-7
[000000000000001111100000000000000000000000000]=-6
[000000000000000111110000000000000000000000000]=-5
[000000000000000011111000000000000000000000000]=-4
[000000000000000001111100000000000000000000000]=-3
[000000000000000000111110000000000000000000000]=-2
[000000000000000000011111000000000000000000000]=-1
[000000000000000000001111100000000000000000000]=0
[000000000000000000000111110000000000000000000]=1
[000000000000000000000011111000000000000000000]=2
[000000000000000000000001111100000000000000000]=3
[000000000000000000000000111110000000000000000]=4
[000000000000000000000000011111000000000000000]=5
[000000000000000000000000001111100000000000000]=6
[000000000000000000000000000111110000000000000]=7
[000000000000000000000000000011111000000000000]=8
[000000000000000000000000000001111100000000000]=9
[000000000000000000000000000000111110000000000]=10
[000000000000000000000000000000011111000000000]=11
[000000000000000000000000000000001111100000000]=12
[000000000000000000000000000000000111110000000]=13
[000000000000000000000000000000000011111000000]=14
[000000000000000000000000000000000001111100000]=15
[000000000000000000000000000000000000111110000]=16
[000000000000000000000000000000000000011111000]=17
[000000000000000000000000000000000000001111100]=18
[000000000000000000000000000000000000000111110]=19
[000000000000000000000000000000000000000011111]=20

``````

# Encode Volume

Encode volume directly as it already represents a rate of change in ownership.

``````

In [39]:

import csv
import glob
import os
import sys
quote_path = "quotes/quantquote_daily_sp500_83986/daily/"
min_volume = sys.maxsize
max_volume = 0
for path in glob.iglob(os.path.join(quote_path, "*")):
if path.endswith(".csv"):
with open(path, 'rb') as csvfile:
volume = float(quote_line[-1])
if volume > max_volume:
max_volume = volume
if volume < min_volume:
min_volume = volume
print "min_volume=", str(min_volume)
print "max_volume=", str(max_volume)

``````
``````

min_volume= 0.0
max_volume= 7768849900.0

``````
``````

In [37]:

datetime.datetime.strptime("2015-05-27", "%Y-%m-%d").date()

``````
``````

Out[37]:

datetime.date(2015, 5, 27)

``````