In [1]:
import sys # for gioia to load aiohttp
sys.path.append('/Users/maggiori/anaconda/envs/py35/lib/python3.5/site-packages')
In [2]:
# to import modules locally without having installed the entire package
# http://stackoverflow.com/questions/714063/importing-modules-from-parent-folder
import os, sys, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)
In [3]:
import signal
import time
import subprocess
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_style('white')
sns.set_context('notebook')
In [4]:
# database parameters
ts_length = 100
data_dir = '../db_files'
db_name = 'default'
dir_path = data_dir + '/' + db_name + '/'
In [5]:
# clear file system for testing
if not os.path.exists(dir_path):
os.makedirs(dir_path)
filelist = [dir_path + f for f in os.listdir(dir_path)]
for f in filelist:
os.remove(f)
In [6]:
# when running from the terminal
# python go_server_persistent.py --ts_length 100 --db_name 'demo'
# here we load the server as a subprocess for demonstration purposes
server = subprocess.Popen(['python', '../go_server_persistent.py',
'--ts_length', str(ts_length), '--data_dir', data_dir, '--db_name', db_name])
time.sleep(5) # make sure it loads completely
In [7]:
# when running from the terminal
# python go_webserver.py
# here we load the server as a subprocess for demonstration purposes
webserver = subprocess.Popen(['python', '../go_webserver.py'])
time.sleep(5) # make sure it loads completely
In [8]:
from webserver import *
In [9]:
web_interface = WebInterface()
Let's create some dummy data to aid in our demonstration. You will need to import the timeseries package to work with the TimeSeries format.
Note: the database is persistent, so can store data between sessions, but we will start with an empty database here for demonstration purposes.
In [10]:
from timeseries import *
In [11]:
def tsmaker(m, s, j):
'''
Helper function: randomly generates a time series for testing.
Parameters
----------
m : float
Mean value for generating time series data
s : float
Standard deviation value for generating time series data
j : float
Quantifies the "jitter" to add to the time series data
Returns
-------
A time series and associated meta data.
'''
# generate metadata
meta = {}
meta['order'] = int(np.random.choice(
[-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5]))
meta['blarg'] = int(np.random.choice([1, 2]))
# generate time series data
t = np.arange(0.0, 1.0, 0.01)
v = norm.pdf(t, m, s) + j * np.random.randn(ts_length)
# return time series and metadata
return meta, TimeSeries(t, v)
In [12]:
# generate sample time series
num_ts = 50
mus = np.random.uniform(low=0.0, high=1.0, size=num_ts)
sigs = np.random.uniform(low=0.05, high=0.4, size=num_ts)
jits = np.random.uniform(low=0.05, high=0.2, size=num_ts)
# initialize dictionaries for time series and their metadata
primary_keys = []
tsdict = {}
metadict = {}
# fill dictionaries with randomly generated entries for database
for i, m, s, j in zip(range(num_ts), mus, sigs, jits):
meta, tsrs = tsmaker(m, s, j) # generate data
pk = "ts-{}".format(i) # generate primary key
primary_keys.append(pk) # keep track of all primary keys
tsdict[pk] = tsrs # store time series data
metadict[pk] = meta # store metadata
# to assist with later testing
ts_keys = sorted(tsdict.keys())
# randomly choose time series as vantage points
num_vps = 5
vpkeys = list(np.random.choice(ts_keys, size=num_vps, replace=False))
vpdist = ['d_vp_{}'.format(i) for i in vpkeys]
In [13]:
# check that the database is empty
web_interface.select()
Out[13]:
In [14]:
# add stats trigger
web_interface.add_trigger('stats', 'insert_ts', ['mean', 'std'], None)
Out[14]:
In [15]:
# insert the time series
for k in tsdict:
web_interface.insert_ts(k, tsdict[k])
In [16]:
# upsert the metadata
for k in tsdict:
web_interface.upsert_meta(k, metadict[k])
In [17]:
# add the vantage points
for i in range(num_vps):
web_interface.insert_vp(vpkeys[i])
In [18]:
# select all database entries; all metadata fields
results = web_interface.select(fields=[])
# we have the right number of database entries
assert len(results) == num_ts
# we have all the right primary keys
assert sorted(results.keys()) == ts_keys
In [19]:
# check that all the time series and metadata matches
for k in tsdict:
results = web_interface.select(fields=['ts'], md={'pk': k})
assert results[k]['ts'] == tsdict[k]
results = web_interface.select(fields=[], md={'pk': k})
for field in metadict[k]:
assert metadict[k][field] == results[k][field]
In [20]:
# check that the vantage points match
print('Vantage points selected:', vpkeys)
print('Vantage points in database:',
web_interface.select(fields=None, md={'vp': True}, additional={'sort_by': '+pk'}).keys())
In [21]:
# check that the vantage point distance fields have been created
print('Vantage point distance fields:', vpdist)
web_interface.select(fields=vpdist, additional={'sort_by': '+pk', 'limit': 1})
Out[21]:
In [22]:
# check that the trigger has executed as expected (allowing for rounding errors)
for k in tsdict:
results = web_interface.select(fields=['mean', 'std'], md={'pk': k})
assert np.round(results[k]['mean'], 4) == np.round(tsdict[k].mean(), 4)
assert np.round(results[k]['std'], 4) == np.round(tsdict[k].std(), 4)
Let's generate an additional time series for similarity searches. We'll store the time series and the results of the similarity searches, so that we can compare against them after reloading the database.
In [23]:
_, query = tsmaker(np.random.uniform(low=0.0, high=1.0),
np.random.uniform(low=0.05, high=0.4),
np.random.uniform(low=0.05, high=0.2))
In [24]:
results_vp = web_interface.vp_similarity_search(query, 1)
results_vp
Out[24]:
In [25]:
results_isax = web_interface.isax_similarity_search(query)
results_isax
Out[25]:
Finally, let's store our iSAX tree representation.
In [26]:
results_tree = web_interface.isax_tree()
print(results_tree)
Now that we know that everything is loaded, let's close the database and re-open it.
In [27]:
os.kill(server.pid, signal.SIGINT)
time.sleep(5) # give it time to terminate
os.kill(webserver.pid, signal.SIGINT)
time.sleep(5) # give it time to terminate
web_interface = None
In [28]:
server = subprocess.Popen(['python', '../go_server_persistent.py',
'--ts_length', str(ts_length), '--data_dir', data_dir, '--db_name', db_name])
time.sleep(5) # give it time to load fully
webserver = subprocess.Popen(['python', '../go_webserver.py'])
time.sleep(5) # give it time to load fully
web_interface = WebInterface()
In [29]:
# select all database entries; all metadata fields
results = web_interface.select(fields=[])
# we have the right number of database entries
assert len(results) == num_ts
# we have all the right primary keys
assert sorted(results.keys()) == ts_keys
In [30]:
# check that all the time series and metadata matches
for k in tsdict:
results = web_interface.select(fields=['ts'], md={'pk': k})
assert results[k]['ts'] == tsdict[k]
results = web_interface.select(fields=[], md={'pk': k})
for field in metadict[k]:
assert metadict[k][field] == results[k][field]
In [31]:
# check that the vantage points match
print('Vantage points selected:', vpkeys)
print('Vantage points in database:',
web_interface.select(fields=None, md={'vp': True}, additional={'sort_by': '+pk'}).keys())
In [32]:
# check that isax tree has fully reloaded
print(web_interface.isax_tree())
In [33]:
# compare vantage point search results
results_vp == web_interface.vp_similarity_search(query, 1)
Out[33]:
In [34]:
# compare isax search results
results_isax == web_interface.isax_similarity_search(query)
Out[34]:
In [35]:
# check that the trigger is still there by loading new data
# create test time series
_, test = tsmaker(np.random.uniform(low=0.0, high=1.0),
np.random.uniform(low=0.05, high=0.4),
np.random.uniform(low=0.05, high=0.2))
# insert test time series
web_interface.insert_ts('test', test)
# check that mean and standard deviation have been calculated
print(web_interface.select(fields=['mean', 'std'], md={'pk': 'test'}))
# remove test time series
web_interface.delete_ts('test');
We have successfully reloaded all of the database components from disk!
In [36]:
# terminate processes before exiting
os.kill(server.pid, signal.SIGINT)
time.sleep(5) # give it time to terminate
web_interface = None
webserver.terminate()