Time series signatures


In [1]:
import os, sys
sys.path.append(os.path.abspath('../../main/python'))

In [2]:
import datetime as dt

import numpy as np
import pandas as pd

import thalesians.tsa.signatures as signatures

import importlib
importlib.reload(signatures)


Out[2]:
<module 'thalesians.tsa.signatures' from 'S:\\dev\\tsa\\src\\main\\python\\thalesians\\tsa\\signatures.py'>

Basic example

This is the data from the example in [CK16], Section 2.1.1, equations (2.1)-(2.3):


In [3]:
df = pd.DataFrame(
    np.array(((1.,1.),(3.,4.),(5.,2.),(8.,6.))),
    columns=('A', 'B'))

In [4]:
df


Out[4]:
A B
0 1.0 1.0
1 3.0 4.0
2 5.0 2.0
3 8.0 6.0

This reproduces equation (2.11) in [CK16]:


In [5]:
signatures.signature(df)


Out[5]:
{(1,): 7.0, (1, 1): 24.5, (1, 2): 19.0, (2,): 5.0, (2, 1): 16.0, (2, 2): 12.5}

(An even simpler) variation on the basic example


In [6]:
df = pd.DataFrame(
    np.array([[1.], [3.], [5.], [9.], [10.]]),
    columns=['A'])

In [7]:
signatures.signature(df, max_word_length=5)


Out[7]:
{(1,): 9.0,
 (1, 1): 40.5,
 (1, 1, 1): 121.49999999999999,
 (1, 1, 1, 1): 273.375,
 (1, 1, 1, 1, 1): 492.07499999999999}

In [8]:
df = pd.DataFrame(
    np.array([[1.], [3.], [2.], [-9.], [10.]]),
    columns=['A'])

In [9]:
signatures.signature(df, max_word_length=5)


Out[9]:
{(1,): 9.0,
 (1, 1): 40.5,
 (1, 1, 1): 121.49999999999987,
 (1, 1, 1, 1): 273.37500000000074,
 (1, 1, 1, 1, 1): 492.07500000000215}

Slightly more involved


In [10]:
df = pd.DataFrame([
    [63.62, 41.48, 177.14, 102.22, 105.35, 759.44, 33.31, 13.97],
    [63.73, 41.68, 174.09, 102.73, 102.71, 761.53, 32.43, 13.72],
    [62.81, 41.05, 169.84, 102.97, 100.70, 759.33, 31.27, 13.11],
    [60.27, 40.27, 164.62,  97.92,  96.45, 741.00, 29.99, 12.70],
    [58.92, 39.70, 163.94,  97.33,  96.96, 730.91, 29.53, 12.54],
    [58.83, 39.86, 165.73,  97.51,  98.53, 733.07, 30.25, 12.77],
    [58.96, 40.62, 165.71,  99.37,  99.96, 745.34, 30.30, 12.85],
    [57.34, 39.01, 158.99,  95.44,  97.39, 719.57, 30.49, 12.20],
    [58.20, 39.93, 161.39,  98.37,  99.52, 731.39, 30.30, 12.19],
    [57.04, 39.05, 155.61,  94.97,  97.13, 710.49, 29.57, 11.97],
    [57.01, 39.13, 156.82,  95.26,  96.66, 719.08, 29.40, 11.95],
    [55.51, 38.87, 153.75,  94.35,  96.79, 718.56, 29.42, 11.90],
    [55.25, 38.52, 151.65,  94.16,  96.30, 726.67, 29.55, 12.01],
    [56.95, 39.37, 156.86,  97.94, 101.42, 745.46, 29.28, 12.14],
    [55.66, 38.29, 151.12,  97.01,  99.44, 733.62, 29.23, 11.98],
    [57.08, 39.02, 154.45,  97.34,  99.99, 733.79, 29.65, 12.26],
    [57.04, 39.18, 153.72,  94.45,  93.42, 717.58, 29.21, 11.85],
    [57.28, 39.22, 157.06, 109.11,  94.09, 748.30, 29.02, 11.71],
    [59.50, 40.06, 161.56, 112.21,  97.34, 761.35, 29.64, 11.94],
    [58.86, 39.80, 159.65, 115.09,  96.43, 770.77, 30.11, 12.07],
    [57.03, 38.75, 151.70, 114.61,  94.48, 780.91, 29.65, 11.51],
    [57.41, 39.28, 152.68, 112.69,  96.35, 749.38, 28.92, 11.46],
    [58.40, 40.11, 156.49, 110.49,  96.60, 730.03, 28.64, 11.53],
    [57.75, 40.09, 156.47, 104.07,  94.02, 703.76, 28.54, 11.45],
    [56.54, 39.53, 149.25,  99.75,  95.01, 704.16, 28.68, 11.59]],
    index=[
        dt.datetime(2016, 1, 4),
        dt.datetime(2016, 1, 5),
        dt.datetime(2016, 1, 6),
        dt.datetime(2016, 1, 7),
        dt.datetime(2016, 1, 8),
        dt.datetime(2016, 1, 11),
        dt.datetime(2016, 1, 12),
        dt.datetime(2016, 1, 13),
        dt.datetime(2016, 1, 14),
        dt.datetime(2016, 1, 15),
        dt.datetime(2016, 1, 19),
        dt.datetime(2016, 1, 20),
        dt.datetime(2016, 1, 21),
        dt.datetime(2016, 1, 22),
        dt.datetime(2016, 1, 25),
        dt.datetime(2016, 1, 26),
        dt.datetime(2016, 1, 27),
        dt.datetime(2016, 1, 28),
        dt.datetime(2016, 1, 29),
        dt.datetime(2016, 2, 1),
        dt.datetime(2016, 2, 2),
        dt.datetime(2016, 2, 3),
        dt.datetime(2016, 2, 4),
        dt.datetime(2016, 2, 5),
        dt.datetime(2016, 2, 8)],
    columns=[
        'JPM UN Equity',
        'USB UN Equity',
        'GS UN Equity',
        'FB UW Equity',
        'AAPL UW Equity',
        'GOOGL UW Equity',
        'GM UN Equity',
        'F UN Equity'])

In [11]:
df


Out[11]:
JPM UN Equity USB UN Equity GS UN Equity FB UW Equity AAPL UW Equity GOOGL UW Equity GM UN Equity F UN Equity
2016-01-04 63.62 41.48 177.14 102.22 105.35 759.44 33.31 13.97
2016-01-05 63.73 41.68 174.09 102.73 102.71 761.53 32.43 13.72
2016-01-06 62.81 41.05 169.84 102.97 100.70 759.33 31.27 13.11
2016-01-07 60.27 40.27 164.62 97.92 96.45 741.00 29.99 12.70
2016-01-08 58.92 39.70 163.94 97.33 96.96 730.91 29.53 12.54
2016-01-11 58.83 39.86 165.73 97.51 98.53 733.07 30.25 12.77
2016-01-12 58.96 40.62 165.71 99.37 99.96 745.34 30.30 12.85
2016-01-13 57.34 39.01 158.99 95.44 97.39 719.57 30.49 12.20
2016-01-14 58.20 39.93 161.39 98.37 99.52 731.39 30.30 12.19
2016-01-15 57.04 39.05 155.61 94.97 97.13 710.49 29.57 11.97
2016-01-19 57.01 39.13 156.82 95.26 96.66 719.08 29.40 11.95
2016-01-20 55.51 38.87 153.75 94.35 96.79 718.56 29.42 11.90
2016-01-21 55.25 38.52 151.65 94.16 96.30 726.67 29.55 12.01
2016-01-22 56.95 39.37 156.86 97.94 101.42 745.46 29.28 12.14
2016-01-25 55.66 38.29 151.12 97.01 99.44 733.62 29.23 11.98
2016-01-26 57.08 39.02 154.45 97.34 99.99 733.79 29.65 12.26
2016-01-27 57.04 39.18 153.72 94.45 93.42 717.58 29.21 11.85
2016-01-28 57.28 39.22 157.06 109.11 94.09 748.30 29.02 11.71
2016-01-29 59.50 40.06 161.56 112.21 97.34 761.35 29.64 11.94
2016-02-01 58.86 39.80 159.65 115.09 96.43 770.77 30.11 12.07
2016-02-02 57.03 38.75 151.70 114.61 94.48 780.91 29.65 11.51
2016-02-03 57.41 39.28 152.68 112.69 96.35 749.38 28.92 11.46
2016-02-04 58.40 40.11 156.49 110.49 96.60 730.03 28.64 11.53
2016-02-05 57.75 40.09 156.47 104.07 94.02 703.76 28.54 11.45
2016-02-08 56.54 39.53 149.25 99.75 95.01 704.16 28.68 11.59

In [12]:
signatures.signature(df, max_word_length=2)


Out[12]:
{(1,): -7.0799999999999983,
 (1, 1): 25.063199999999984,
 (1, 2): 6.0017499999999737,
 (1, 3): 106.58764999999991,
 (1, 4): -3.3171999999999926,
 (1, 5): 18.630049999999962,
 (1, 6): 228.75960000000023,
 (1, 7): 12.240049999999982,
 (1, 8): 7.3664499999999871,
 (2,): -1.9499999999999957,
 (2, 1): 7.8042499999999917,
 (2, 2): 1.9012499999999917,
 (2, 3): 32.697049999999983,
 (2, 4): -7.0254000000000296,
 (2, 5): 6.5714999999999826,
 (2, 6): 69.278749999999874,
 (2, 7): 4.4688999999999863,
 (2, 8): 2.6193999999999957,
 (3,): -27.889999999999986,
 (3, 1): 90.873549999999923,
 (3, 2): 21.688449999999875,
 (3, 3): 388.92604999999969,
 (3, 4): 39.679849999999973,
 (3, 5): 107.85864999999977,
 (3, 6): 1052.2296999999999,
 (3, 7): 56.102949999999929,
 (3, 8): 28.720899999999958,
 (4,): -2.4699999999999989,
 (4, 1): 20.804799999999979,
 (4, 2): 11.841900000000017,
 (4, 3): 29.208449999999942,
 (4, 4): 3.0504499999999974,
 (4, 5): 29.961249999999836,
 (4, 6): -223.68689999999989,
 (4, 7): -0.11485000000001663,
 (4, 8): 0.83894999999997721,
 (5,): -10.339999999999989,
 (5, 1): 54.577149999999932,
 (5, 2): 13.591499999999954,
 (5, 3): 180.52394999999979,
 (5, 4): -4.421449999999882,
 (5, 5): 53.457799999999892,
 (5, 6): 380.99625000000077,
 (5, 7): 27.010449999999974,
 (5, 8): 14.981799999999987,
 (6,): -55.280000000000086,
 (6, 1): 162.62280000000035,
 (6, 2): 38.517250000000068,
 (6, 3): 489.52950000000146,
 (6, 4): 360.22850000000022,
 (6, 5): 190.59894999999949,
 (6, 6): 1527.9392000000046,
 (6, 7): 38.192600000000233,
 (6, 8): 19.059950000000054,
 (7,): -4.6300000000000026,
 (7, 1): 20.540350000000025,
 (7, 2): 4.5595999999999988,
 (7, 3): 73.027750000000097,
 (7, 4): 11.550950000000023,
 (7, 5): 20.86375,
 (7, 6): 217.75380000000027,
 (7, 7): 10.718450000000011,
 (7, 8): 5.5547000000000066,
 (8,): -2.3800000000000008,
 (8, 1): 9.4839500000000125,
 (8, 2): 2.0215999999999958,
 (8, 3): 37.657300000000035,
 (8, 4): 5.0396500000000195,
 (8, 5): 9.6273999999999962,
 (8, 6): 112.5064500000002,
 (8, 7): 5.4647000000000023,
 (8, 8): 2.8322000000000016}

In [13]:
signatures.signature(df[['JPM UN Equity','USB UN Equity']], max_word_length=2)


Out[13]:
{(1,): -7.0799999999999983,
 (1, 1): 25.063199999999984,
 (1, 2): 6.0017499999999737,
 (2,): -1.9499999999999957,
 (2, 1): 7.8042499999999917,
 (2, 2): 1.9012499999999917}

In [14]:
signatures.signature(df[['JPM UN Equity']])


Out[14]:
{(1,): -7.0799999999999983}

Bibliography

[CK16] Ilya Chevyrev and Andrey Kormilitzin. A Primer on the Signature Method in Machine Learning. arXiv:1603.03788v1 [stat.ML] 11 Mar 2016, https://arxiv.org/pdf/1603.03788.pdf