In [1]:
## Setup the path for our codebase
import sys
sys.path.append( '../code/' )
In [2]:
## import our time_series codebase
import time_series.generated_datasets
import time_series.result_set
import time_series.algorithm
In [3]:
dataset_0 = time_series.generated_datasets.DSS[0]( 10 )
In [6]:
dataset_0.taxonomy
Out[6]:
In [7]:
%matplotlib inline
import matplotlib.pyplot as plt
In [8]:
time_series.generated_datasets.plot_dataset( dataset_0 )
In [9]:
## import numpy for polyfit
import numpy as np
In [10]:
##
# Define a new algorithm for a polynomial least-squares fit
class PolyFitAlg( time_series.algorithm.AlertAlgorithm):
def __init__(self, order):
self.order = order
time_series.algorithm.AlertAlgorithm.__init__( self, "Polyfit[{0}]".format(order) )
def __call__( self, target, history ):
# fit the polynomail to history
n = len(history)
poly = np.poly1d( np.polyfit( xrange(n), history, self.order ) )
expected = poly(n)
difference = abs(target - expected)
if target != 0:
fraction = difference / abs(target)
else:
# Assume target is actuall 1, so absolute difference instead of fraction
fraction = difference
result = {
'target' : target,
'expected' : expected,
'order' : self.order,
'difference' : difference,
'fraction' : fraction,
'poly' : poly,
}
return fraction, result
In [11]:
alg_pf = PolyFitAlg( 4 )
frac, res = alg_pf( 10.0, xrange(10) )
In [12]:
res
Out[12]:
In [13]:
plt.plot( xrange(13), res['poly']( xrange(13) ))
Out[13]:
In [14]:
frac,res = alg_pf( dataset_0.time_series[-1], dataset_0.time_series[:-1] )
res
Out[14]:
In [15]:
n = len(dataset_0.time_series)
plt.plot( xrange(n), res['poly']( xrange(n)) )
plt.hold( True )
plt.plot( xrange(n-1), dataset_0.time_series[:-1], 'r.' )
Out[15]:
In [16]:
dataset_0.taxonomy
Out[16]:
In [17]:
n = len(dataset_0.time_series)
plt.plot( xrange(n), res['poly']( xrange(n)) )
plt.hold( True )
plt.plot( xrange(n), dataset_0.time_series, 'r.' )
Out[17]:
In [18]:
alg_pf8 = PolyFitAlg( 8 )
frac8,res8 = alg_pf8( dataset_0.time_series[-1], dataset_0.time_series[:-1] )
n = len(dataset_0.time_series)
plt.figure()
plt.plot( xrange(n-1), res8['poly']( xrange(n-1)) )
plt.hold( True )
plt.plot( xrange(n-1), dataset_0.time_series[:-1], 'r.', ms=10 )
plt.figure()
plt.plot( xrange(n + 2), res8['poly']( xrange(n + 2)) )
plt.hold( True )
plt.plot( xrange(n), dataset_0.time_series, 'r.', ms=10 )
Out[18]:
In [19]:
## compute the mean squared error for a history and a PolyFit algorithm
def polyfit_mse( alg, history ):
# first fit the algorithm with a dummy target
frac, res = alg( history[-1], history )
# ok, grab polynomial from fit and compute errors
poly = res['poly']
x = xrange(len(history))
errors = np.array(history) - poly(x)
# compute mean squared error
mse = np.mean( errors * errors.transpose() )
return mse
In [20]:
mse_pf4 = polyfit_mse( alg_pf, dataset_0.time_series[:-1] )
mse_pf8 = polyfit_mse( alg_pf8, dataset_0.time_series[:-1] )
print "order 4 MSE: {0}".format( mse_pf4 )
print "order 8 MSE: {0}".format( mse_pf8 )
In [21]:
run_spec_pf4 = time_series.result_set.RunSpec( time_series.generated_datasets.DSS[0], alg_pf)
run_spec_pf8 = time_series.result_set.RunSpec( time_series.generated_datasets.DSS[0], alg_pf8)
rset_pf4 = run_spec_pf4.collect_results( 20, 5, 9 )
rset_pf8 = run_spec_pf8.collect_results( 20, 5, 9 )
stats_pf4 = time_series.result_set.compute_classifier_stats( rset_pf4, 0.5 )
stats_pf8 = time_series.result_set.compute_classifier_stats( rset_pf8, 0.5 )
print "order 4 stats: {0}".format( stats_pf4 )
print "order 8 stats: {0}".format( stats_pf8 )
So what now? This is where we use all of the methods for preventing overfitting:
In [ ]:
aic( alg ) = MSE( alg ) + log(N) * COMPLEXITY