In [1]:
import os, sys
sys.path.append(os.path.abspath('../../main/python'))
In [2]:
import datetime as dt
import numpy as np
import pandas as pd
import thalesians.tsa.signatures as signatures
import importlib
importlib.reload(signatures)
Out[2]:
This is the data from the example in [CK16], Section 2.1.1, equations (2.1)-(2.3):
In [3]:
df = pd.DataFrame(
np.array(((1.,1.),(3.,4.),(5.,2.),(8.,6.))),
columns=('A', 'B'))
In [4]:
df
Out[4]:
This reproduces equation (2.11) in [CK16]:
In [5]:
signatures.signature(df)
Out[5]:
In [6]:
df = pd.DataFrame(
np.array([[1.], [3.], [5.], [9.], [10.]]),
columns=['A'])
In [7]:
signatures.signature(df, max_word_length=5)
Out[7]:
In [8]:
df = pd.DataFrame(
np.array([[1.], [3.], [2.], [-9.], [10.]]),
columns=['A'])
In [9]:
signatures.signature(df, max_word_length=5)
Out[9]:
In [10]:
df = pd.DataFrame([
[63.62, 41.48, 177.14, 102.22, 105.35, 759.44, 33.31, 13.97],
[63.73, 41.68, 174.09, 102.73, 102.71, 761.53, 32.43, 13.72],
[62.81, 41.05, 169.84, 102.97, 100.70, 759.33, 31.27, 13.11],
[60.27, 40.27, 164.62, 97.92, 96.45, 741.00, 29.99, 12.70],
[58.92, 39.70, 163.94, 97.33, 96.96, 730.91, 29.53, 12.54],
[58.83, 39.86, 165.73, 97.51, 98.53, 733.07, 30.25, 12.77],
[58.96, 40.62, 165.71, 99.37, 99.96, 745.34, 30.30, 12.85],
[57.34, 39.01, 158.99, 95.44, 97.39, 719.57, 30.49, 12.20],
[58.20, 39.93, 161.39, 98.37, 99.52, 731.39, 30.30, 12.19],
[57.04, 39.05, 155.61, 94.97, 97.13, 710.49, 29.57, 11.97],
[57.01, 39.13, 156.82, 95.26, 96.66, 719.08, 29.40, 11.95],
[55.51, 38.87, 153.75, 94.35, 96.79, 718.56, 29.42, 11.90],
[55.25, 38.52, 151.65, 94.16, 96.30, 726.67, 29.55, 12.01],
[56.95, 39.37, 156.86, 97.94, 101.42, 745.46, 29.28, 12.14],
[55.66, 38.29, 151.12, 97.01, 99.44, 733.62, 29.23, 11.98],
[57.08, 39.02, 154.45, 97.34, 99.99, 733.79, 29.65, 12.26],
[57.04, 39.18, 153.72, 94.45, 93.42, 717.58, 29.21, 11.85],
[57.28, 39.22, 157.06, 109.11, 94.09, 748.30, 29.02, 11.71],
[59.50, 40.06, 161.56, 112.21, 97.34, 761.35, 29.64, 11.94],
[58.86, 39.80, 159.65, 115.09, 96.43, 770.77, 30.11, 12.07],
[57.03, 38.75, 151.70, 114.61, 94.48, 780.91, 29.65, 11.51],
[57.41, 39.28, 152.68, 112.69, 96.35, 749.38, 28.92, 11.46],
[58.40, 40.11, 156.49, 110.49, 96.60, 730.03, 28.64, 11.53],
[57.75, 40.09, 156.47, 104.07, 94.02, 703.76, 28.54, 11.45],
[56.54, 39.53, 149.25, 99.75, 95.01, 704.16, 28.68, 11.59]],
index=[
dt.datetime(2016, 1, 4),
dt.datetime(2016, 1, 5),
dt.datetime(2016, 1, 6),
dt.datetime(2016, 1, 7),
dt.datetime(2016, 1, 8),
dt.datetime(2016, 1, 11),
dt.datetime(2016, 1, 12),
dt.datetime(2016, 1, 13),
dt.datetime(2016, 1, 14),
dt.datetime(2016, 1, 15),
dt.datetime(2016, 1, 19),
dt.datetime(2016, 1, 20),
dt.datetime(2016, 1, 21),
dt.datetime(2016, 1, 22),
dt.datetime(2016, 1, 25),
dt.datetime(2016, 1, 26),
dt.datetime(2016, 1, 27),
dt.datetime(2016, 1, 28),
dt.datetime(2016, 1, 29),
dt.datetime(2016, 2, 1),
dt.datetime(2016, 2, 2),
dt.datetime(2016, 2, 3),
dt.datetime(2016, 2, 4),
dt.datetime(2016, 2, 5),
dt.datetime(2016, 2, 8)],
columns=[
'JPM UN Equity',
'USB UN Equity',
'GS UN Equity',
'FB UW Equity',
'AAPL UW Equity',
'GOOGL UW Equity',
'GM UN Equity',
'F UN Equity'])
In [11]:
df
Out[11]:
In [12]:
signatures.signature(df, max_word_length=2)
Out[12]:
In [13]:
signatures.signature(df[['JPM UN Equity','USB UN Equity']], max_word_length=2)
Out[13]:
In [14]:
signatures.signature(df[['JPM UN Equity']])
Out[14]:
[CK16] Ilya Chevyrev and Andrey Kormilitzin. A Primer on the Signature Method in Machine Learning. arXiv:1603.03788v1 [stat.ML] 11 Mar 2016, https://arxiv.org/pdf/1603.03788.pdf