In [13]:
import os, shutil, math
import blaze as blz
from blaze.ts.ucr_dtw import ucr
import numpy as np
In [14]:
NROWS = int(1e6)
CS = 128
NPERIODS = NROWS // CS
In [15]:
# Create a large time series dataset by appending chunks
if os.path.exists('ts.blz'): shutil.rmtree('ts.blz')
ts = blz.array([], 'x, float64', params=blz.params(storage='ts.blz'))
for i in range(NPERIODS):
# Proceed to fill the empty array in chunks
x = np.linspace(i, i+1, CS)*math.pi
ts.append(x*np.sin(x))
ts.commit()
In [16]:
# Create a first dataset to query
xq = np.linspace(3, 4, CS)*math.pi
query = xq*np.sin(xq)
In [17]:
# Do the search for the exact pattern
%time loc, dist = ucr.dtw(ts, query, 0.1, query.size, verbose=True)
print "Location : ", loc
print "Distance : ", dist
print "Data Scanned : ", ts.size
In [18]:
# Plot the results
win = 500
sl = slice(max(0, loc-win), min(ts.size, loc+query.size+win))
x = np.arange(sl.start, sl.stop)
slp = slice(loc, loc+query.size)
xp = np.arange(slp.start, slp.stop)
plot(x, ts[sl], 'b-', xp, query[:], 'r+')
Out[18]:
In [19]:
# Create a second dataset to query (introducing noise)
n = 0 # no displacement
noise = np.random.randn(query.size)*.1 # introduce some noise
query2 = xq*np.sin(xq)+noise+n
In [20]:
# Do the search for the noisy pattern
%time loc, dist = ucr.dtw(ts, query2, 0.1, query2.size, verbose=True)
print "Location : ", loc
print "Distance : ", dist
print "Data Scanned : ", ts.size
In [21]:
# Plot the results
win = 500
sl = slice(max(0, loc-win), min(ts.size, loc+query2.size+win))
x = np.arange(sl.start, sl.stop)
slp = slice(loc, loc+query2.size)
xp = np.arange(slp.start, slp.stop)
plot(x, ts[sl], 'b-', xp, query2[:], 'r+')
Out[21]:
In [22]:
# Create a third dataset to query (noise + displacement)
n = 5 # introduce some displacement
noise = np.random.randn(query.size)*.1 # introduce some noise
query3 = xq*np.sin(xq)+noise+n
In [23]:
# Do the search for the noisy pattern
%time loc, dist = ucr.dtw(ts, query3, 0.1, query3.size, verbose=True)
print "Location : ", loc
print "Distance : ", dist
print "Data Scanned : ", ts.size
In [24]:
# Plot the results
win = 500
sl = slice(max(0, loc-win), min(ts.size, loc+query3.size+win))
x = np.arange(sl.start, sl.stop)
slp = slice(loc, loc+query3.size)
xp = np.arange(slp.start, slp.stop)
plot(x, ts[sl], 'b-', xp, query3[:], 'r+')
Out[24]:
In [ ]: