In [1]:
import sys # for gioia to load aiohttp
sys.path.append('/Users/maggiori/anaconda/envs/py35/lib/python3.5/site-packages')

# to import modules locally without having installed the entire package
# http://stackoverflow.com/questions/714063/importing-modules-from-parent-folder
import os, sys, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir) 

import time
import signal
import subprocess
import numpy as np
from scipy.stats import norm
import json
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_style('white')
sns.set_context('notebook')

In [2]:
from timeseries import *
from webserver import *


Generating LALR tables

In [3]:
def tsmaker(m, s, j):
    '''
    Helper function: randomly generates a time series for testing.

    Parameters
    ----------
    m : float
        Mean value for generating time series data
    s : float
        Standard deviation value for generating time series data
    j : float
        Quantifies the "jitter" to add to the time series data

    Returns
    -------
    A time series and associated meta data.
    '''

    # generate metadata
    meta = {}
    meta['order'] = int(np.random.choice(
        [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5]))
    meta['blarg'] = int(np.random.choice([1, 2]))

    # generate time series data
    t = np.arange(0.0, 1.0, 0.01)
    v = norm.pdf(t, m, s) + j * np.random.randn(ts_length)

    # return time series and metadata
    return meta, TimeSeries(t, v)

In [4]:
# load stock data
with open('data/prices_exclude.json') as f:    
    stock_data_exclude = json.load(f)

# keep track of which stocks are included/excluded from the database
stocks_exclude = list(stock_data_exclude.keys())
    
# check the number of market days in the year
num_days = len(stock_data_exclude[stocks_exclude[0]])

Time series database & similarity searches

Gioia Dominedo
Nicolas Drizard
Kendrick Lo

Persistence architecture

Persistence demonstration

Initialize file system


In [5]:
# database parameters
ts_length = 100
data_dir = '../db_files'
db_name = 'default'
dir_path = data_dir + '/' + db_name + '/'

In [6]:
# clear file system for testing
if not os.path.exists(dir_path):
    os.makedirs(dir_path)
filelist = [dir_path + f for f in os.listdir(dir_path)]
for f in filelist:
    os.remove(f)

Initialize database


In [7]:
# load the server
server = subprocess.Popen(['python', '../go_server_persistent.py',
                           '--ts_length', str(ts_length), '--data_dir', data_dir, '--db_name', db_name])
time.sleep(5)  # make sure it loads completely

# load the webserver
webserver = subprocess.Popen(['python', '../go_webserver.py'])
time.sleep(5)  # make sure it loads completely

# initialize the web interface
web_interface = WebInterface()

In [8]:
# check that the database is empty
web_interface.select()


Out[8]:
OrderedDict()

Generate data


In [9]:
# generate sample time series
num_ts = 50
mus = np.random.uniform(low=0.0, high=1.0, size=num_ts)
sigs = np.random.uniform(low=0.05, high=0.4, size=num_ts)
jits = np.random.uniform(low=0.05, high=0.2, size=num_ts)

# initialize dictionaries for time series and their metadata
primary_keys = []
tsdict = {}
metadict = {}

# fill dictionaries with randomly generated entries for database
for i, m, s, j in zip(range(num_ts), mus, sigs, jits):
    meta, tsrs = tsmaker(m, s, j)  # generate data
    pk = "ts-{}".format(i)  # generate primary key
    primary_keys.append(pk) # keep track of all primary keys
    tsdict[pk] = tsrs  # store time series data
    metadict[pk] = meta  # store metadata
    
# to assist with later testing
ts_keys = sorted(tsdict.keys())
    
# randomly choose time series as vantage points
num_vps = 5
vpkeys = list(np.random.choice(ts_keys, size=num_vps, replace=False))
vpdist = ['d_vp_{}'.format(i) for i in vpkeys]

Insert data


In [10]:
# add stats trigger
web_interface.add_trigger('stats', 'insert_ts', ['mean', 'std'], None)

# insert the time series
for k in tsdict:
    web_interface.insert_ts(k, tsdict[k])
    
# upsert the metadata
for k in tsdict:
    web_interface.upsert_meta(k, metadict[k])
    
# add the vantage points
for i in range(num_vps):
    web_interface.insert_vp(vpkeys[i])

Inspect data

Are all the entries there?


In [11]:
# select all database entries; all metadata fields
results = web_interface.select(fields=[])

# we have the right number of database entries
assert len(results) == num_ts

# we have all the right primary keys
assert sorted(results.keys()) == ts_keys

Does the data match?


In [12]:
# check that all the time series and metadata matches
for k in tsdict:
    results = web_interface.select(fields=['ts'], md={'pk': k})
    assert results[k]['ts'] == tsdict[k]
    results = web_interface.select(fields=[], md={'pk': k})
    for field in metadict[k]:
        assert metadict[k][field] == results[k][field]

Did the triggers work?


In [13]:
# check that the trigger has executed as expected (allowing for rounding errors)
for k in tsdict:
    results = web_interface.select(fields=['mean', 'std'], md={'pk': k})
    assert np.round(results[k]['mean'], 4) == np.round(tsdict[k].mean(), 4)
    assert np.round(results[k]['std'], 4) == np.round(tsdict[k].std(), 4)

Were the vantage points inserted?


In [14]:
# check that the vantage points match
print('Vantage points selected:', vpkeys)
print('Vantage points in database:',
      web_interface.select(fields=None, md={'vp': True}, additional={'sort_by': '+pk'}).keys(), '\n')

# check that the vantage point distance fields have been created
results = web_interface.select(fields=vpdist, additional={'sort_by': '+pk', 'limit': 1})
print('Vantage point distance fields:', vpdist)
print('Vantage point distance example:')
print(results)


Vantage points selected: ['ts-38', 'ts-22', 'ts-23', 'ts-47', 'ts-35']
Vantage points in database: odict_keys(['ts-22', 'ts-23', 'ts-35', 'ts-38', 'ts-47']) 

Vantage point distance fields: ['d_vp_ts-38', 'd_vp_ts-22', 'd_vp_ts-23', 'd_vp_ts-47', 'd_vp_ts-35']
Vantage point distance example:
OrderedDict([('ts-0', OrderedDict([('d_vp_ts-38', 0.4820992648601532), ('d_vp_ts-23', 0.4757295548915863), ('d_vp_ts-22', 0.7358911633491516), ('d_vp_ts-35', 0.847390353679657), ('d_vp_ts-47', 0.8995630145072937)]))])

Terminate database


In [15]:
os.kill(server.pid, signal.SIGINT)
time.sleep(5)  # give it time to terminate
os.kill(webserver.pid, signal.SIGINT)
time.sleep(5)  # give it time to terminate
web_interface = None

Reload database


In [16]:
server = subprocess.Popen(['python', '../go_server_persistent.py',
                           '--ts_length', str(ts_length), '--data_dir', data_dir, '--db_name', db_name])
time.sleep(5)  # give it time to load fully
webserver = subprocess.Popen(['python', '../go_webserver.py'])
time.sleep(5)  # give it time to load fully
web_interface = WebInterface()

... once more, with feeling!

Are all the entries there?


In [17]:
# select all database entries; all metadata fields
results = web_interface.select(fields=[])

# we have the right number of database entries
assert len(results) == num_ts

# we have all the right primary keys
assert sorted(results.keys()) == ts_keys

Does the data match?


In [18]:
# check that all the time series and metadata matches
for k in tsdict:
    results = web_interface.select(fields=['ts'], md={'pk': k})
    assert results[k]['ts'] == tsdict[k]
    results = web_interface.select(fields=[], md={'pk': k})
    for field in metadict[k]:
        assert metadict[k][field] == results[k][field]

Were the vantage points re-loaded?


In [19]:
# check that the vantage points match
print('Vantage points selected:', vpkeys)
print('Vantage points in database:',
      web_interface.select(fields=None, md={'vp': True}, additional={'sort_by': '+pk'}).keys())


Vantage points selected: ['ts-38', 'ts-22', 'ts-23', 'ts-47', 'ts-35']
Vantage points in database: odict_keys(['ts-22', 'ts-23', 'ts-35', 'ts-38', 'ts-47'])

Does the trigger still work?


In [20]:
# check that the trigger is still there by loading new data

# create test time series
_, test = tsmaker(np.random.uniform(low=0.0, high=1.0),
                  np.random.uniform(low=0.05, high=0.4),
                  np.random.uniform(low=0.05, high=0.2))

# insert test time series
web_interface.insert_ts('test', test)

# check that mean and standard deviation have been calculated
web_interface.select(fields=['mean', 'std'], md={'pk': 'test'})


Out[20]:
OrderedDict([('test',
              OrderedDict([('mean', 0.5932886600494385),
                           ('std', 0.8402645587921143)]))])

Terminate database


In [21]:
# terminate processes before exiting
os.kill(server.pid, signal.SIGINT)
time.sleep(5)  # give it time to terminate
web_interface = None
webserver.terminate()

Additional Feature:

iSAX Similarity Searches

  • Time series T of length n can be converted into a SAX representation
    • approximates T using a smaller number of segments (e.g. w=4)
    • each segment is represented by a discrete number
    • result is a SAX word (i.e. a vector) such as {11, 11, 01, 00}

  • SAX representation can be used as an index
    • e.g. fixed cardinality of 8, word length of 4
    • An example T may map to $\{6^8, 6^8, 3^8, 0^8\}$
      • data for all Ts that can be represented by this SAX word can be stored in the same text file on disk (e.g. with name 6.8_6.8_3.8_0.8.txt)
  • Problem: storage imbalance
  • Solution: introduce a threshold for the number of time series that can be stored in a single file
    • If an insertion would cause threshold to be exceeded, split the file
  • The diagram below illustrates an iSAX index as a tree
    • root node: represents complete SAX space
    • terminal node: leaf node containing pointer to file on disk
      • SAX word as index, contents are the actual time series data
    • internal node (new): designates split in SAX space
      • created when number of entries in a terminal node exceeds threshold

  • Observations:
    • binary splits are along one dimension (sequentially)
      • creates two new words of increased cardinality
    • “new” node splitting policy (iSAX 2.0) purports to provide better balance by determining optimal dimension
      • checks whether mean value is close to a breakpoint
  • However:
    • balancing problems still exist
    • no justification given for binary splits
      • note that root is connected to multiple nodes

  • We implemented iSAX tree as a “true” n-ary tree
    • splits series in a “full” terminal node into up to ‘n’ terminal nodes all located at the same depth in the tree
    • achieves better balance, faster traversals
  • Class methods:
    • insert, delete, preorder (x2), similarity search (find_nbr)
  • Similarity search is “approximate”
    • intuition is that two similar time series are often represented by the same iSAX word
    • natural clustering
    • “ties” broken by computing Euclidean distance (but only for neighbors)
      • adjust for sparser nodes by implementing search for series with common parent

Stock market example

Here's one we prepared earlier...


In [22]:
# load the database server
server = subprocess.Popen(['python', '../go_server_persistent.py',
                           '--ts_length', str(num_days), '--data_dir', '../db_files', '--db_name', 'stock_prices'])
time.sleep(5)  # make sure it loads completely

# load the database webserver
webserver = subprocess.Popen(['python', '../go_webserver.py'])
time.sleep(5)  # make sure it loads completely

# initialize the web interface
web_interface = WebInterface()

Is there any data?


In [23]:
# number of stocks
len(web_interface.select())


Out[23]:
350

What does it look like?


In [24]:
# let's look at the first 10 stocks
web_interface.select(fields=['ts'], additional={'sort_by': '+pk', 'limit': 10})


Out[24]:
OrderedDict([('A',
              OrderedDict([('ts',
                            TimeSeries(Length: 245 [25.55, ..., 28.56]))])),
             ('AA',
              OrderedDict([('ts',
                            TimeSeries(Length: 245 [12.56, ..., 10.57]))])),
             ('ABC',
              OrderedDict([('ts',
                            TimeSeries(Length: 245 [21.05, ..., 28.92]))])),
             ('ABT',
              OrderedDict([('ts',
                            TimeSeries(Length: 245 [45.39, ..., 49.34]))])),
             ('ADI',
              OrderedDict([('ts',
                            TimeSeries(Length: 245 [28.42, ..., 29.62]))])),
             ('ADM',
              OrderedDict([('ts',
                            TimeSeries(Length: 245 [28.67, ..., 30.52]))])),
             ('AEE',
              OrderedDict([('ts',
                            TimeSeries(Length: 245 [27.05, ..., 27.14]))])),
             ('AEP',
              OrderedDict([('ts',
                            TimeSeries(Length: 245 [31.49, ..., 34.82]))])),
             ('AES',
              OrderedDict([('ts',
                            TimeSeries(Length: 245 [14.26, ..., 10.4]))])),
             ('AET',
              OrderedDict([('ts',
                            TimeSeries(Length: 245 [29.62, ..., 27.11]))]))])

Can we run a vantage point search?


In [25]:
# pick a stock
stock = np.random.choice(stocks_exclude)
print('Stock:', stock)

# run the vantage point similarity search
result = web_interface.vp_similarity_search(TimeSeries(range(num_days), stock_data_exclude[stock]), 1)
stock_match = list(result)[0]
stock_ts = web_interface.select(fields=['ts'], md={'pk': stock_match})[stock_match]['ts']
print('Most similar stock:', stock_match)


Stock: APC
Most similar stock: CF

How do the stocks compare?


In [26]:
# visualize similarity
plt.plot(stock_data_exclude[stock], label='Query:' + stock)
plt.plot(stock_ts.values(), label='Result:' + stock_match)
plt.xticks([])
plt.legend(loc='best')
plt.title('Daily Stock Price Similarity')
plt.show()


Can we run an iSAX tree similarity search?


In [27]:
# pick the stock
stock = np.random.choice(stocks_exclude)
print('Stock:', stock)

# run the isax tree similarity search
result = web_interface.isax_similarity_search(TimeSeries(range(num_days), stock_data_exclude[stock]))

if result == 'ERROR: NO_MATCH':
    print('Could not find a similar stock.')
else:
    stock_match = list(result)[0]
    stock_ts = web_interface.select(fields=['ts'], md={'pk': stock_match})[stock_match]['ts']
    print('Most similar stock:', stock_match)


Stock: TMK
Most similar stock: TJX

How do the stocks compare?


In [28]:
# visualize similarity
plt.plot(stock_data_exclude[stock], label='Query:' + stock)
plt.plot(stock_ts.values(), label='Result:' + stock_match)
plt.xticks([])
plt.legend(loc='best')
plt.title('Daily Stock Price Similarity')
plt.show()


How do the methods compare?


In [29]:
# pick the stock
stock = np.random.choice(stocks_exclude)
print('Stock:', stock)

# run the vantage point similarity search
result = web_interface.vp_similarity_search(TimeSeries(range(num_days), stock_data_exclude[stock]), 1)
match_vp = list(result)[0]
ts_vp = web_interface.select(fields=['ts'], md={'pk': match_vp})[match_vp]['ts']
print('VP search result:', match_vp)

# run the isax similarity search
result = web_interface.isax_similarity_search(TimeSeries(range(num_days), stock_data_exclude[stock]))
if result == 'ERROR: NO_MATCH':
    print('iSAX search result: Could not find a similar stock.')
else:
    match_isax = list(result)[0]
    ts_isax = web_interface.select(fields=['ts'], md={'pk': match_isax})[match_isax]['ts']
    print('iSAX search result:', match_isax)


Stock: APC
VP search result: CF
iSAX search result: GIS

Show me another graph!


In [30]:
# visualize similarity
plt.plot(stock_data_exclude[stock], label='Query:' + stock)
plt.plot(ts_vp.values(), label='Result:' + match_vp)
plt.plot(ts_isax.values(), label='Result:' + match_isax)
plt.xticks([])
plt.legend(loc='best')
plt.title('Daily Stock Price Similarity')
plt.show()


What is an iSAX tree, really?


In [31]:
print(web_interface.isax_tree())


root
--->['01', '00', '01', '11']: 0 []
------>['011', '001', '011', '110']: 1 ['COG']
------>['011', '000', '011', '111']: 1 ['PFE']
------>['011', '001', '010', '111']: 5 ['AA', 'AKS', 'CF', 'CSC', 'KG']
------>['010', '001', '011', '111']: 2 ['BAX', 'NOV']
------>['011', '001', '011', '111']: 1 ['CNX']
--->['11', '10', '01', '00']: 0 []
------>['110', '101', '010', '001']: 5 ['BLL', 'CAT', 'EMC', 'MCK', 'RSG']
------>['111', '100', '010', '001']: 5 ['ABC', 'D', 'EOG', 'RHT', 'WEC']
------>['111', '101', '010', '000']: 2 ['AVB', 'TIE']
------>['111', '101', '010', '001']: 0 []
--------->['1110', '1010', '0100', '0010']: 0 []
------------>['11101', '10101', '01000', '00100']: 1 ['Q']
------------>['11100', '10100', '01000', '00101']: 2 ['HRL', 'MCD']
------------>['11100', '10101', '01001', '00100']: 1 ['BF.B']
------------>['11100', '10101', '01000', '00100']: 1 ['PXD']
------------>['11101', '10100', '01001', '00100']: 1 ['VTR']
--------->['1110', '1010', '0101', '0010']: 1 ['CPB']
------>['110', '100', '010', '001']: 1 ['SO']
------>['110', '101', '011', '000']: 2 ['AZO', 'NEM']
------>['110', '101', '010', '000']: 2 ['CRM', 'TWC']
--->['11', '10', '00', '00']: 0 []
------>['111', '101', '001', '001']: 0 []
--------->['1110', '1011', '0011', '0010']: 4 ['AIV', 'EQR', 'HAS', 'PSA']
--------->['1110', '1010', '0010', '0011']: 3 ['LXK', 'VAR', 'WAT']
--------->['1110', '1011', '0010', '0011']: 3 ['HOT', 'LTD', 'UNP']
--------->['1110', '1010', '0011', '0010']: 3 ['CLX', 'SLE', 'VNO']
------>['111', '100', '001', '001']: 0 []
--------->['1110', '1001', '0011', '0011']: 3 ['HSP', 'MO', 'NU']
--------->['1110', '1001', '0010', '0011']: 1 ['TEG']
--------->['1110', '1001', '0011', '0010']: 3 ['MKC', 'SYY', 'WPI']
--------->['1110', '1000', '0011', '0011']: 2 ['DE', 'DTE']
------>['110', '101', '001', '001']: 5 ['BXP', 'CBG', 'GPC', 'MAR', 'NKE']
--->['11', '01', '01', '01']: 0 []
------>['111', '010', '010', '011']: 4 ['CNP', 'POM', 'SJM', 'UNH']
------>['110', '010', '011', '011']: 3 ['ADI', 'CTL', 'PCG']
------>['111', '011', '010', '010']: 4 ['CMS', 'PNW', 'TE', 'XEL']
------>['111', '010', '010', '010']: 2 ['BMY', 'ED']
------>['110', '011', '010', '011']: 1 ['SEE']
--->['11', '10', '00', '01']: 0 []
------>['111', '100', '001', '010']: 0 []
--------->['1110', '1000', '0010', '0100']: 1 ['DVA']
--------->['1110', '1001', '0010', '0100']: 0 []
------------>['11101', '10010', '00100', '01000']: 1 ['EL']
------------>['11100', '10010', '00101', '01001']: 1 ['COH']
------------>['11101', '10011', '00101', '01000']: 1 ['GR']
------------>['11101', '10010', '00100', '01001']: 1 ['HNZ']
------------>['11100', '10011', '00101', '01000']: 1 ['PNC']
------------>['11101', '10011', '00100', '01000']: 1 ['ATI']
--------->['1110', '1000', '0011', '0100']: 2 ['LH', 'SCG']
--------->['1110', '1001', '0010', '0101']: 3 ['CBS', 'DOV', 'EMR']
--------->['1110', '1000', '0010', '0101']: 4 ['F', 'LUV', 'SNA', 'WYN']
------>['110', '101', '000', '011']: 0 []
--------->['1101', '1010', '0001', '0110']: 2 ['GNW', 'HON']
--------->['1101', '1011', '0001', '0110']: 2 ['PH', 'TGT']
--------->['1100', '1011', '0001', '0111']: 1 ['VFC']
--------->['1100', '1010', '0001', '0111']: 1 ['XRX']
--------->['1101', '1010', '0001', '0111']: 1 ['HD']
------>['111', '100', '001', '011']: 3 ['CAG', 'PCP', 'SWK']
------>['111', '101', '000', '010']: 2 ['BA', 'ROK']
------>['110', '101', '001', '010']: 0 []
--------->['1101', '1010', '0010', '0101']: 1 ['UPS']
--------->['1101', '1011', '0010', '0101']: 2 ['LEG', 'PEP']
--------->['1101', '1011', '0010', '0100']: 2 ['DRI', 'PPG']
--------->['1101', '1010', '0011', '0100']: 1 ['CSX']
--------->['1101', '1010', '0010', '0100']: 1 ['KFT']
------>['111', '101', '001', '010']: 0 []
--------->['1110', '1010', '0010', '0100']: 5 ['DIS', 'ETN', 'IFF', 'NSC', 'WHR']
--------->['1110', '1011', '0010', '0100']: 1 ['CMA']
------>['110', '100', '001', '011']: 4 ['FTI', 'PLL', 'TER', 'TMO']
------>['111', '100', '000', '011']: 2 ['AMP', 'NOC']
------>['110', '100', '001', '010']: 3 ['AGN', 'COP', 'ICE']
------>['110', '101', '000', '010']: 1 ['KIM']
--->['10', '10', '00', '10']: 0 []
------>['101', '100', '001', '100']: 2 ['BHI', 'OXY']
------>['101', '101', '000', '101']: 2 ['ANF', 'LNC']
------>['101', '101', '000', '100']: 3 ['BBT', 'LEN', 'RDC']
------>['101', '100', '001', '101']: 2 ['TRV', 'XL']
------>['101', '101', '001', '100']: 1 ['HOG']
------>['101', '100', '000', '101']: 3 ['EK', 'GE', 'OMC']
------>['100', '101', '000', '101']: 1 ['WFC']
--->['11', '01', '00', '10']: 0 []
------>['110', '011', '001', '100']: 3 ['AFL', 'EMN', 'RL']
------>['110', '010', '001', '101']: 2 ['DOW', 'JBL']
------>['110', '011', '001', '101']: 1 ['FDX']
------>['110', '010', '000', '101']: 2 ['RTN', 'WMB']
------>['111', '010', '001', '100']: 1 ['GLW']
------>['111', '011', '000', '100']: 2 ['SE', 'SYK']
------>['110', '010', '001', '100']: 1 ['CL']
------>['110', '011', '000', '101']: 3 ['BCR', 'EFX', 'X']
------>['110', '011', '000', '100']: 1 ['TIF']
--->['00', '10', '01', '11']: 0 []
------>['001', '101', '011', '110']: 1 ['MUR']
------>['001', '100', '011', '110']: 1 ['IGT']
------>['000', '100', '011', '110']: 1 ['TSO']
------>['001', '100', '011', '111']: 2 ['STT', 'WFR']
------>['001', '100', '010', '110']: 1 ['JPM']
------>['001', '101', '010', '110']: 2 ['JCP', 'OI']
------>['001', '100', '010', '111']: 1 ['KSS']
--->['00', '11', '01', '10']: 1 ['C']
--->['11', '10', '00', '10']: 0 []
------>['110', '100', '000', '100']: 3 ['A', 'JCI', 'PKI']
------>['110', '101', '000', '101']: 1 ['LMT']
------>['110', '101', '000', '100']: 2 ['CCL', 'FO']
------>['110', '100', '000', '101']: 1 ['USB']
--->['10', '01', '00', '11']: 0 []
------>['100', '011', '001', '110']: 3 ['BBY', 'CAM', 'ITW']
------>['100', '010', '001', '110']: 2 ['MCO', 'WY']
------>['101', '011', '000', '110']: 4 ['DV', 'GD', 'HAR', 'RHI']
------>['101', '010', '001', '110']: 4 ['APA', 'HES', 'HPQ', 'NBL']
------>['100', '011', '001', '111']: 1 ['MAS']
------>['100', '010', '001', '111']: 1 ['LSI']
--->['11', '01', '00', '01']: 0 []
------>['111', '010', '001', '011']: 4 ['AMD', 'BMS', 'HRS', 'PG']
------>['111', '011', '001', '010']: 3 ['AXP', 'GAS', 'NI']
------>['111', '011', '001', '011']: 4 ['GCI', 'MMM', 'SLM', 'WM']
------>['110', '011', '001', '011']: 1 ['APH']
--->['00', '01', '10', '11']: 0 []
------>['000', '010', '101', '111']: 2 ['AES', 'BSX']
------>['001', '010', '100', '111']: 1 ['XOM']
------>['001', '010', '101', '110']: 2 ['APD', 'WU']
------>['001', '010', '101', '111']: 1 ['EXC']
------>['000', '011', '100', '111']: 1 ['GT']
--->['00', '01', '01', '11']: 0 []
------>['001', '011', '011', '111']: 5 ['AVP', 'GS', 'IVZ', 'MFE', 'MS']
------>['001', '010', '011', '111']: 1 ['FII']
--->['11', '00', '01', '10']: 4 ['CI', 'MRK', 'NYT', 'WLP']
--->['11', '01', '01', '10']: 3 ['BTU', 'K', 'LLY']
--->['10', '01', '01', '11']: 4 ['FCX', 'HAL', 'NSM', 'SLB']
--->['11', '11', '01', '00']: 3 ['AIZ', 'EP', 'YUM']
--->['10', '00', '01', '11']: 0 []
------>['100', '001', '010', '111']: 2 ['DGX', 'WDC']
------>['100', '001', '011', '110']: 1 ['EIX']
------>['101', '001', '010', '110']: 3 ['ABT', 'MHS', 'MWV']
--->['00', '11', '00', '10']: 1 ['MMC']
--->['01', '10', '00', '11']: 0 []
------>['011', '100', '001', '110']: 2 ['DHI', 'HIG']
------>['010', '100', '001', '111']: 3 ['ITT', 'LUK', 'SVU']
------>['010', '101', '001', '110']: 2 ['BAC', 'CVS']
------>['010', '100', '001', '110']: 1 ['PHM']
------>['011', '101', '000', '110']: 1 ['GPS']
------>['011', '100', '000', '111']: 1 ['BK']
--->['01', '10', '01', '10']: 2 ['DFS', 'PBI']
--->['10', '10', '00', '11']: 3 ['ALL', 'FLS', 'SWY']
--->['01', '01', '00', '11']: 5 ['BEN', 'DHR', 'FHN', 'GIS', 'THC']
--->['10', '11', '00', '01']: 0 []
------>['101', '110', '000', '011']: 2 ['M', 'MET']
------>['101', '110', '001', '011']: 3 ['DNR', 'MI', 'UNM']
------>['100', '110', '001', '010']: 1 ['TWX']
--->['11', '00', '00', '11']: 3 ['BDX', 'MDT', 'MHP']
--->['01', '11', '10', '01']: 1 ['MRO']
--->['10', '00', '10', '11']: 2 ['FRX', 'NBR']
--->['11', '01', '10', '00']: 1 ['PGN']
--->['00', '00', '10', '11']: 2 ['DO', 'MON']
--->['01', '01', '10', '11']: 3 ['CHK', 'T', 'TSS']
--->['11', '00', '01', '01']: 2 ['AEP', 'DUK']
--->['11', '01', '01', '00']: 4 ['AMT', 'HUM', 'RAI', 'TDC']
--->['11', '01', '00', '11']: 2 ['LLL', 'MEE']
--->['10', '00', '00', '11']: 5 ['CVH', 'JNJ', 'MA', 'PLD', 'WMT']
--->['00', '10', '01', '10']: 1 ['FLR']
--->['10', '10', '10', '00']: 2 ['AN', 'S']
--->['01', '00', '10', '11']: 0 []
------>['011', '001', '101', '110']: 2 ['SRE', 'SWN']
------>['010', '001', '101', '110']: 2 ['ADM', 'PPL']
------>['011', '001', '100', '110']: 3 ['DNB', 'DVN', 'VZ']
------>['011', '000', '100', '111']: 1 ['HRB']
--->['01', '01', '01', '11']: 0 []
------>['010', '011', '010', '111']: 1 ['JNS']
------>['011', '010', '010', '111']: 1 ['EQT']
------>['010', '011', '010', '110']: 1 ['NUE']
------>['010', '010', '010', '111']: 1 ['CA']
------>['010', '010', '011', '111']: 2 ['RRC', 'WAG']
--->['11', '11', '00', '01']: 3 ['IPG', 'PRU', 'SII']
--->['01', '10', '01', '11']: 2 ['KR', 'VMC']
--->['01', '10', '00', '10']: 2 ['VLO', 'WPO']
--->['10', '01', '01', '10']: 4 ['CVX', 'IP', 'L', 'TXN']
--->['11', '11', '00', '00']: 0 []
------>['110', '110', '001', '001']: 0 []
--------->['1101', '1100', '0011', '0010']: 2 ['FDO', 'PGR']
--------->['1100', '1100', '0011', '0011']: 1 ['DD']
--------->['1101', '1101', '0010', '0010']: 1 ['CCE']
--------->['1101', '1100', '0011', '0011']: 2 ['KEY', 'SHW']
--------->['1101', '1100', '0010', '0011']: 1 ['TJX']
--->['01', '11', '01', '10']: 1 ['LM']
--->['10', '11', '01', '01']: 1 ['NWL']
--->['00', '01', '11', '10']: 1 ['MOT']
--->['01', '11', '10', '00']: 1 ['PCS']
--->['00', '10', '10', '10']: 2 ['AYE', 'TAP']
--->['00', '11', '10', '01']: 1 ['PWR']
--->['10', '00', '10', '10']: 2 ['AEE', 'KO']
--->['11', '00', '00', '10']: 1 ['ZMH']
--->['10', '01', '10', '01']: 2 ['KMB', 'PEG']
--->['10', '11', '01', '00']: 3 ['FIS', 'HSY', 'SUN']
--->['00', '10', '10', '11']: 1 ['GME']
--->['01', '10', '01', '01']: 1 ['R']
--->['00', '11', '00', '11']: 1 ['JEC']
--->['00', '11', '01', '11']: 1 ['IRM']
--->['10', '01', '01', '01']: 1 ['HCN']
--->['10', '10', '01', '01']: 2 ['PX', 'STZ']
--->['01', '01', '01', '10']: 1 ['AVY']
--->['10', '01', '00', '10']: 2 ['AET', 'STJ']
--->['01', '11', '00', '10']: 1 ['PFG']
--->['01', '11', '00', '01']: 1 ['NYX']
--->['00', '00', '11', '11']: 1 ['FE']
--->['10', '11', '10', '00']: 1 ['PTV']
--->['00', '11', '01', '01']: 1 ['AIG']

Don't forget to close the server!


In [32]:
# terminate processes before exiting
os.kill(server.pid, signal.SIGINT)
time.sleep(5)  # give it time to terminate
web_interface = None
webserver.terminate()

Any questions?


In [ ]: