We look a bit more in-depth at simple strategies in this sheet, using pyfolio to generate tearsheets for us to better understand their characteristics



In [1]:

    
# imports
import pandas as pd
import numpy as np
from scipy import stats

import sklearn
from sklearn import preprocessing as pp

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import interactive
interactive(True)

import sys
import tensorflow as tf
import time
import os
import collections
import os.path
import pickle
import logging as log
log.basicConfig(level=log.DEBUG)

import quandl
import pyfolio as pf



In [2]:

    
f = 'U.pkl'
P = pickle.load(open(f))
log.info('loaded <%s>',f)
P.describe()









    



INFO:root:loaded <U.pkl>






    Out[2]:






  
    
      
      Multiplier
      Open
      High
      Low
      Close
      Volume
      ADV
      DeltaV
      Return
      Fwd_Close
      Fwd_Return
      SD
      Fwd_Open
      Fwd_COReturn
    
  
  
    
      count
      2725932.0
      2.725932e+06
      2.725932e+06
      2.725932e+06
      2.725932e+06
      2.725932e+06
      2.725931e+06
      2.725931e+06
      2.725931e+06
      2.725932e+06
      2.725932e+06
      2.725929e+06
      2.725932e+06
      2.725932e+06
    
    
      mean
      1.0
      2.226893e+02
      2.243057e+02
      2.208029e+02
      2.226456e+02
      4.025552e+06
      4.023279e+06
      2.661003e-04
      2.940238e-04
      2.227142e+02
      2.940263e-04
      2.206396e-02
      2.227574e+02
      3.153228e-04
    
    
      std
      0.0
      5.015723e+03
      5.042574e+03
      4.979409e+03
      5.012831e+03
      1.212138e+07
      1.086728e+07
      4.786192e-01
      2.754559e-02
      5.014403e+03
      2.754558e-02
      1.668339e-02
      5.017295e+03
      1.402322e-02
    
    
      min
      1.0
      1.455814e-02
      7.864865e-02
      1.455814e-02
      5.898649e-02
      4.300000e+01
      1.000000e+02
      -9.486203e+00
      -3.570812e+00
      5.898649e-02
      -3.570812e+00
      0.000000e+00
      1.455814e-02
      -9.997442e-01
    
    
      25%
      1.0
      1.536626e+01
      1.560987e+01
      1.511146e+01
      1.536896e+01
      6.116000e+05
      6.956722e+05
      -2.777810e-01
      -1.029169e-02
      1.537259e+01
      -1.029169e-02
      1.215633e-02
      1.537000e+01
      -3.631412e-03
    
    
      50%
      1.0
      2.655789e+01
      2.693000e+01
      2.617679e+01
      2.656049e+01
      1.462800e+06
      1.580758e+06
      -1.145079e-02
      2.200946e-04
      2.656879e+01
      2.200946e-04
      1.756784e-02
      2.656501e+01
      0.000000e+00
    
    
      75%
      1.0
      4.509414e+01
      4.563840e+01
      4.454000e+01
      4.510266e+01
      3.598300e+06
      3.713368e+06
      2.617968e-01
      1.102714e-02
      4.511558e+01
      1.102714e-02
      2.636890e-02
      4.510598e+01
      4.241282e-03
    
    
      max
      1.0
      2.281800e+05
      2.293740e+05
      2.275300e+05
      2.293000e+05
      2.304019e+09
      6.079510e+08
      1.032482e+01
      1.449269e+00
      2.293000e+05
      1.449269e+00
      8.122759e-01
      2.281800e+05
      3.078370e+00



In [3]:

    
# let's skip the dotcom period 
U = P[P.index >= '2005-01-01']
U.describe()









    Out[3]:






  
    
      
      Multiplier
      Open
      High
      Low
      Close
      Volume
      ADV
      DeltaV
      Return
      Fwd_Close
      Fwd_Return
      SD
      Fwd_Open
      Fwd_COReturn
    
  
  
    
      count
      1917012.0
      1.917012e+06
      1.917012e+06
      1.917012e+06
      1.917012e+06
      1.917012e+06
      1.917012e+06
      1.917012e+06
      1.917012e+06
      1.917012e+06
      1.917012e+06
      1.917012e+06
      1.917012e+06
      1.917012e+06
    
    
      mean
      1.0
      2.591351e+02
      2.609176e+02
      2.569808e+02
      2.590585e+02
      4.508978e+06
      4.505266e+06
      3.556294e-04
      2.319738e-04
      2.591378e+02
      2.378314e-04
      2.032868e-02
      2.592140e+02
      2.992531e-04
    
    
      std
      0.0
      5.682414e+03
      5.712009e+03
      5.641011e+03
      5.678604e+03
      1.328341e+07
      1.195089e+07
      4.317016e-01
      2.563653e-02
      5.680366e+03
      2.563016e-02
      1.570851e-02
      5.684177e+03
      1.331522e-02
    
    
      min
      1.0
      7.864865e-02
      7.864865e-02
      4.915541e-02
      5.898649e-02
      4.300000e+01
      1.685500e+02
      -9.486203e+00
      -3.273741e+00
      5.898649e-02
      -3.273741e+00
      0.000000e+00
      7.864865e-02
      -9.599594e-01
    
    
      25%
      1.0
      1.957748e+01
      1.987066e+01
      1.928268e+01
      1.958098e+01
      7.638000e+05
      8.509638e+05
      -2.612910e-01
      -9.522656e-03
      1.958591e+01
      -9.513844e-03
      1.145206e-02
      1.958000e+01
      -3.644035e-03
    
    
      50%
      1.0
      3.248337e+01
      3.288977e+01
      3.206036e+01
      3.248649e+01
      1.722242e+06
      1.848915e+06
      -1.079121e-02
      3.295436e-04
      3.249498e+01
      3.345321e-04
      1.620110e-02
      3.249060e+01
      0.000000e+00
    
    
      75%
      1.0
      5.280824e+01
      5.339683e+01
      5.221000e+01
      5.281635e+01
      4.046400e+06
      4.149695e+06
      2.466611e-01
      1.030708e-02
      5.283295e+01
      1.031024e-02
      2.387542e-02
      5.282686e+01
      4.275697e-03
    
    
      max
      1.0
      2.281800e+05
      2.293740e+05
      2.275300e+05
      2.293000e+05
      2.304019e+09
      6.079510e+08
      1.032482e+01
      1.356115e+00
      2.293000e+05
      1.356115e+00
      7.328309e-01
      2.281800e+05
      3.078370e+00



In [4]:

    
import sim
_,B = sim.sim(U)
#plot NAV
B.NAV.plot(title='Equal Weight Everyone')









    



INFO:root:ran over 2986 days and 1917012 rows in 108 secs






    Out[4]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fd2a7eb4a50>



In [5]:

    
pd.__version__









    Out[5]:





u'0.19.1'



In [6]:

    
B.index=B.index.tz_localize('UTC')
pf.create_full_tear_sheet(B.NET_Return)









    



Entire data start date: 2005-01-03
Entire data end date: 2016-11-04


Backtest Months: 142






    






  
    
      Performance statistics
      Backtest
    
  
  
    
      cum_returns_final
      1.68
    
    
      annual_return
      0.09
    
    
      annual_volatility
      0.18
    
    
      sharpe_ratio
      0.55
    
    
      calmar_ratio
      0.18
    
    
      stability_of_timeseries
      0.90
    
    
      max_drawdown
      -0.47
    
    
      omega_ratio
      1.14
    
    
      sortino_ratio
      0.81
    
    
      skew
      0.50
    
    
      kurtosis
      24.63
    
    
      tail_ratio
      0.97
    
    
      common_sense_ratio
      1.05
    
    
      information_ratio
      0.01
    
    
      alpha
      0.03
    
    
      beta
      0.84
    
  








    






  
    
      Worst Drawdown Periods
      net drawdown in %
      peak date
      valley date
      recovery date
      duration
    
  
  
    
      0
      47.27
      2007-10-09
      2008-11-20
      2009-08-04
      476
    
    
      1
      13.25
      2011-04-29
      2011-10-03
      2012-01-25
      194
    
    
      2
      10.35
      2010-04-23
      2010-07-06
      2010-11-02
      138
    
    
      3
      8.71
      2006-05-08
      2006-06-13
      2006-10-12
      114
    
    
      4
      8.45
      2015-05-18
      2016-02-11
      2016-06-07
      277
    
  








    




[-0.022 -0.046]






    



/usr/lib64/python2.7/site-packages/matplotlib/scale.py:101: RuntimeWarning: invalid value encountered in less_equal
  a[a <= 0.0] = 1e-300






    












    






  
    
      Stress Events
      mean
      min
      max
    
  
  
    
      Lehmann
      -0.08%
      -6.09%
      3.96%
    
    
      US downgrade/European Debt Crisis
      -0.03%
      -4.35%
      3.15%
    
    
      Fukushima
      0.09%
      -0.76%
      0.87%
    
    
      EZB IR Event
      -0.02%
      -0.72%
      0.66%
    
    
      Aug07
      0.06%
      -2.09%
      1.71%
    
    
      Mar08
      0.15%
      -1.97%
      3.26%
    
    
      Sept08
      -0.28%
      -6.09%
      3.96%
    
    
      2009Q1
      -0.40%
      -6.34%
      6.18%
    
    
      2009Q2
      0.67%
      -6.51%
      10.22%
    
    
      Flash Crash
      -0.16%
      -1.92%
      3.08%
    
    
      Apr14
      0.00%
      -0.84%
      0.43%
    
    
      Oct14
      0.06%
      -0.95%
      0.86%
    
    
      Fall2015
      -0.11%
      -1.39%
      1.20%
    
    
      Low Volatility Bull Market
      0.06%
      -2.40%
      2.51%
    
    
      GFC Crash
      -0.03%
      -10.54%
      11.28%
    
    
      Recovery
      0.08%
      -4.79%
      5.42%
    
    
      New Normal
      0.02%
      -1.56%
      1.26%



In [7]:

    
#quandl_auth = os.environ.get('QUANDL_AUTH', 'sCGfc5f6ph5H2wLATVVW')
#SPY = quandl.get("GOOG/NYSE_SPY", authtoken=quandl_auth)
#SPY.Close.plot()

_,Bw = sim.sim(U, sim_FUN=sim.worst_strat)
Bw.NAV.plot(title="Buy yesterday's 10 worst")









    



INFO:root:ran over 2986 days and 1917012 rows in 113 secs






    Out[7]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fd29cc12a10>



In [8]:

    
Bw.index=Bw.index.tz_localize('UTC')
pf.create_full_tear_sheet(Bw.NET_Return)









    



Entire data start date: 2005-01-03
Entire data end date: 2016-11-04


Backtest Months: 142






    






  
    
      Performance statistics
      Backtest
    
  
  
    
      cum_returns_final
      3.08
    
    
      annual_return
      0.13
    
    
      annual_volatility
      0.26
    
    
      sharpe_ratio
      0.58
    
    
      calmar_ratio
      0.24
    
    
      stability_of_timeseries
      0.80
    
    
      max_drawdown
      -0.53
    
    
      omega_ratio
      1.16
    
    
      sortino_ratio
      0.92
    
    
      skew
      2.31
    
    
      kurtosis
      47.08
    
    
      tail_ratio
      0.98
    
    
      common_sense_ratio
      1.11
    
    
      information_ratio
      0.02
    
    
      alpha
      0.07
    
    
      beta
      0.95
    
  








    






  
    
      Worst Drawdown Periods
      net drawdown in %
      peak date
      valley date
      recovery date
      duration
    
  
  
    
      0
      52.62
      2007-06-04
      2008-11-20
      2009-01-02
      415
    
    
      1
      30.84
      2009-01-08
      2009-03-06
      2009-03-23
      53
    
    
      2
      19.09
      2005-02-02
      2005-04-15
      2005-05-31
      85
    
    
      3
      15.90
      2014-11-24
      2016-01-19
      2016-06-06
      401
    
    
      4
      14.88
      2011-05-02
      2011-10-03
      2014-02-13
      729
    
  








    




[-0.032 -0.069]






    












    






  
    
      Stress Events
      mean
      min
      max
    
  
  
    
      Lehmann
      -0.19%
      -10.39%
      8.86%
    
    
      US downgrade/European Debt Crisis
      0.01%
      -3.72%
      4.66%
    
    
      Fukushima
      0.04%
      -0.86%
      0.76%
    
    
      EZB IR Event
      -0.02%
      -0.94%
      0.74%
    
    
      Aug07
      -0.14%
      -3.25%
      2.33%
    
    
      Mar08
      0.16%
      -3.45%
      7.80%
    
    
      Sept08
      -0.57%
      -10.39%
      8.86%
    
    
      2009Q1
      -0.21%
      -7.97%
      7.12%
    
    
      2009Q2
      0.80%
      -7.72%
      12.51%
    
    
      Flash Crash
      0.02%
      -1.59%
      2.55%
    
    
      Apr14
      0.03%
      -1.08%
      1.06%
    
    
      Oct14
      0.06%
      -1.47%
      1.07%
    
    
      Fall2015
      -0.12%
      -1.60%
      2.59%
    
    
      Low Volatility Bull Market
      0.09%
      -4.64%
      3.79%
    
    
      GFC Crash
      0.10%
      -15.02%
      26.43%
    
    
      Recovery
      0.07%
      -4.07%
      9.36%
    
    
      New Normal
      0.02%
      -2.58%
      3.63%



In [9]:

    
# let's see if we can give the trade more names to 
#   add capacity and reduce vol a bit

kvargs = {'num_names':20}

_,Bw20 = sim.sim(U, sim_FUN=sim.worst_strat ,kvargs=kvargs)
Bw20.NAV.plot(title="Buy yesterday's 20 worst")









    



INFO:root:ran over 2986 days and 1917012 rows in 114 secs






    Out[9]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fd2c7aa4390>



In [10]:

    
Bw20.index=Bw20.index.tz_localize('UTC')
pf.create_returns_tear_sheet(Bw20.NET_Return)









    



Entire data start date: 2005-01-03
Entire data end date: 2016-11-04


Backtest Months: 142






    






  
    
      Performance statistics
      Backtest
    
  
  
    
      cum_returns_final
      1.54
    
    
      annual_return
      0.08
    
    
      annual_volatility
      0.15
    
    
      sharpe_ratio
      0.59
    
    
      calmar_ratio
      0.24
    
    
      stability_of_timeseries
      0.81
    
    
      max_drawdown
      -0.34
    
    
      omega_ratio
      1.14
    
    
      sortino_ratio
      0.91
    
    
      skew
      1.53
    
    
      kurtosis
      30.73
    
    
      tail_ratio
      1.00
    
    
      common_sense_ratio
      1.08
    
    
      information_ratio
      0.00
    
    
      alpha
      0.04
    
    
      beta
      0.58
    
  








    






  
    
      Worst Drawdown Periods
      net drawdown in %
      peak date
      valley date
      recovery date
      duration
    
  
  
    
      0
      34.39
      2007-06-04
      2008-11-20
      2009-01-02
      415
    
    
      1
      20.25
      2009-01-08
      2009-03-06
      2009-03-23
      53
    
    
      2
      12.67
      2014-11-24
      2016-01-19
      2016-06-06
      401
    
    
      3
      11.70
      2011-05-02
      2011-10-03
      2014-02-13
      729
    
    
      4
      9.63
      2005-02-02
      2005-04-15
      2005-05-31
      85
    
  








    




[-0.019 -0.04 ]



In [11]:

    
pf.plot_annual_returns(Bw20.NET_Return)
#.create_capacity_tear_sheet(Bw20.NET_Return)









    Out[11]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fd2c8586bd0>



In [12]:

    
# let's bring the 'best' into this:
kvargs = {'num_names':20}

_,Bb20 = sim.sim(U, sim_FUN=sim.best_strat ,kvargs=kvargs)
Bb20.NAV.plot(title="Buy yesterday's 20 best")
Bb20.index=Bb20.index.tz_localize('UTC')
pf.create_full_tear_sheet(Bb20.NET_Return)









    



INFO:root:ran over 2986 days and 1917012 rows in 115 secs






    



Entire data start date: 2005-01-03
Entire data end date: 2016-11-04


Backtest Months: 142






    






  
    
      Performance statistics
      Backtest
    
  
  
    
      cum_returns_final
      1.17
    
    
      annual_return
      0.07
    
    
      annual_volatility
      0.16
    
    
      sharpe_ratio
      0.48
    
    
      calmar_ratio
      0.16
    
    
      stability_of_timeseries
      0.75
    
    
      max_drawdown
      -0.41
    
    
      omega_ratio
      1.12
    
    
      sortino_ratio
      0.75
    
    
      skew
      1.95
    
    
      kurtosis
      41.80
    
    
      tail_ratio
      1.12
    
    
      common_sense_ratio
      1.20
    
    
      information_ratio
      -0.00
    
    
      alpha
      0.03
    
    
      beta
      0.56
    
  








    






  
    
      Worst Drawdown Periods
      net drawdown in %
      peak date
      valley date
      recovery date
      duration
    
  
  
    
      0
      41.25
      2008-09-19
      2009-03-06
      2009-04-03
      141
    
    
      1
      17.00
      2008-02-01
      2008-07-15
      2008-09-08
      157
    
    
      2
      15.93
      2013-10-22
      2016-01-19
      2016-03-07
      620
    
    
      3
      10.93
      2010-04-23
      2011-08-10
      2013-05-03
      791
    
    
      4
      8.28
      2005-01-03
      2005-04-28
      2005-07-08
      135
    
  








    




[-0.02 -0.05]






    












    












    






  
    
      Stress Events
      mean
      min
      max
    
  
  
    
      Lehmann
      0.14%
      -4.62%
      5.01%
    
    
      US downgrade/European Debt Crisis
      -0.01%
      -2.42%
      2.16%
    
    
      Fukushima
      0.00%
      -0.44%
      0.69%
    
    
      EZB IR Event
      -0.03%
      -0.77%
      0.80%
    
    
      Aug07
      0.25%
      -1.89%
      2.58%
    
    
      Mar08
      -0.15%
      -2.22%
      2.58%
    
    
      Sept08
      -0.14%
      -4.62%
      5.01%
    
    
      2009Q1
      -0.26%
      -7.88%
      9.61%
    
    
      2009Q2
      1.06%
      -6.78%
      15.36%
    
    
      Flash Crash
      -0.12%
      -1.11%
      1.43%
    
    
      Apr14
      -0.07%
      -1.22%
      0.49%
    
    
      Oct14
      0.00%
      -0.98%
      0.67%
    
    
      Fall2015
      -0.04%
      -1.49%
      1.10%
    
    
      Low Volatility Bull Market
      0.02%
      -1.81%
      2.93%
    
    
      GFC Crash
      0.04%
      -7.88%
      15.36%
    
    
      Recovery
      0.06%
      -4.65%
      5.94%
    
    
      New Normal
      0.01%
      -4.89%
      3.58%

Ok, so we've seen three basic strategies:

Equal-Weight the universe & rebalance daily.
Buy yesterday's worst.
Buy yesterday's best.

None is especially good: only the second (barely) cracks .6 sharpe, all have vol of > 15% and returns of < 10% and all have suffered wrenching drawdowns.

However, they've all done broadly better than the S&P in most environments with the unhappy exception of the so-called new normal.

Let's see if we can improve them just a bit as-is and then we'll see if we can apply any of our lessons from machine-learning to improve yet a bit more.

Back in 1990, Andrew Lo and Craig MacKinlay wrote of a simple 'contrarian' strategy which combines 1, 2 & 3 to create a stat arb strategy. The gist of it is simple. Buy the prior period's losers and sell the prior period's winners in proportion to their over- or under-performance of the equal-weighted market.

Let's take a look at that strategy as our candidate target strategy for amelioration with ML techniques.



In [13]:

    
def eq_wt( U, cfg, kvargs ) :
    # simple, default strategy: equal weight universe on daily basis
    U.Weight = 1/float(len(U.index))
    return U

def bestworst( U,  cfg, kvargs ) :
    # Buy the prior period's losers and sell the prior period's winners in 
    #  proportion to their over- or under-performance of the equal-weighted market.
    N = len(U.index)
    mktR = U.Return.mean()
    Weight = np.add( U.Return, -mktR ) / (-N)
    # now let's ensure that we spend 100% on each side
    U.Weight = 2 * np.sign(Weight) * (abs(Weight) / sum(abs(Weight)))
    return U
    
#aday = U[U.index==U.index.unique()[3]]
#Weight = np.add(aday.Return, -aday.Return.mean())/(-len(aday.index))
#Ws = 2 * np.sign(Weight) * (abs(Weight) / sum(abs(Weight)))

#sum(abs(Ws))
#Ws



In [14]:

    
# let's run it
Sbw,Bbw = sim.sim(U, sim_FUN=bestworst)
Bbw.NAV.plot(title="LoMacKinlay")
Bbw.index=Bbw.index.tz_localize('UTC')
pf.create_full_tear_sheet(Bbw.NET_Return)









    



INFO:root:ran over 2986 days and 1917012 rows in 113 secs






    



Entire data start date: 2005-01-03
Entire data end date: 2016-11-04


Backtest Months: 142






    






  
    
      Performance statistics
      Backtest
    
  
  
    
      cum_returns_final
      1.61
    
    
      annual_return
      0.08
    
    
      annual_volatility
      0.11
    
    
      sharpe_ratio
      0.82
    
    
      calmar_ratio
      0.57
    
    
      stability_of_timeseries
      0.87
    
    
      max_drawdown
      -0.15
    
    
      omega_ratio
      1.19
    
    
      sortino_ratio
      1.27
    
    
      skew
      0.83
    
    
      kurtosis
      14.87
    
    
      tail_ratio
      1.05
    
    
      common_sense_ratio
      1.14
    
    
      information_ratio
      0.00
    
    
      alpha
      0.08
    
    
      beta
      0.09
    
  








    






  
    
      Worst Drawdown Periods
      net drawdown in %
      peak date
      valley date
      recovery date
      duration
    
  
  
    
      0
      14.82
      2007-07-16
      2008-01-23
      2008-03-13
      174
    
    
      1
      8.92
      2008-05-02
      2008-07-17
      2008-07-31
      65
    
    
      2
      8.83
      2009-02-12
      2009-03-18
      2009-04-29
      55
    
    
      3
      8.30
      2013-02-26
      2013-06-24
      2014-08-18
      385
    
    
      4
      7.92
      2008-08-14
      2008-09-17
      2008-09-30
      34
    
  








    




[-0.013 -0.026]






    












    












    






  
    
      Stress Events
      mean
      min
      max
    
  
  
    
      Lehmann
      0.04%
      -2.18%
      5.53%
    
    
      US downgrade/European Debt Crisis
      -0.11%
      -1.51%
      2.46%
    
    
      Fukushima
      0.01%
      -0.58%
      0.33%
    
    
      EZB IR Event
      -0.02%
      -0.49%
      0.57%
    
    
      Aug07
      -0.19%
      -2.71%
      4.00%
    
    
      Mar08
      0.15%
      -1.71%
      2.09%
    
    
      Sept08
      0.19%
      -2.18%
      5.53%
    
    
      2009Q1
      0.18%
      -2.60%
      4.97%
    
    
      2009Q2
      0.09%
      -4.01%
      4.66%
    
    
      Flash Crash
      0.07%
      -0.52%
      1.54%
    
    
      Apr14
      0.03%
      -0.60%
      0.78%
    
    
      Oct14
      0.04%
      -0.92%
      0.87%
    
    
      Fall2015
      -0.08%
      -0.91%
      0.96%
    
    
      Low Volatility Bull Market
      0.04%
      -2.27%
      2.02%
    
    
      GFC Crash
      0.09%
      -5.53%
      6.04%
    
    
      Recovery
      0.02%
      -4.01%
      4.55%
    
    
      New Normal
      0.02%
      -3.03%
      4.04%

there's a reason people like stat arb strats

This is a much better strategy than the others with a sharpe over .8, much lower drawdowns than the others and decent capacity. But as a cousin, it doesn't escape the family influence: it beats the market most of the time, but not in the recently relevant 'recovery' and 'new normal' phases.

So, let's see if we can improve this strategy by utilizing the predictive capabilities we'd fostered with tensorflow's random forest implementation.

How should we attempt to take advantage of the slim edge provided by our nascent ML efforts?

Well, the classifier tells us when it thinks we are headed for a move down, a small move one way or the other, or a move up. When it tells us that the market is going down, mostly the market stays still or goes down.

The possibilities are:

Good: We're buying something predicted to go up
Good: We're selling something predicted to go down
OK : We're buying something predicted to stay flat
OK : We're selling something predicted to stay flat
Bad : We're buying something predicted to go down
Bad : We're selling something predicted to go up

So, let's leave the OKs alone and deallocate from the Bads - say, 50% - and reallocate to the Goods.

If we wanted to trade this, we should adjust for liquidity but since we're just playing we'll keep it simple.

Let's pull-in our ML-prepped data from before, train our model and integrate the results with our strategy



In [15]:

    
fname = 'forsims.pkl'
Dataset = collections.namedtuple('Dataset', ['data', 'target'])
forsims = pickle.load(open(fname))
log.info('read %s', fname)

src_train = forsims['src_train']
src_vlad = forsims['src_vlad']
Kvlad = forsims['Kvlad']
forsims = None
Kvlad.head()









    



INFO:root:read forsims.pkl






    Out[15]:






  
    
      
      Date
      Sym
      Open
      High
      Low
      Close
      Volume
      ADV
      DeltaV
      Return
      SD
      DOY
      DOW
      FSYM
      VARATIO
      Label
    
  
  
    
      0
      2013-01-02
      A
      0.403243
      0.351942
      0.385458
      0.381859
      1.979379
      0.524618
      1.490612
      0.786138
      0.515381
      2
      2
      0
      3.772991
      1
    
    
      1
      2013-01-03
      A
      0.387450
      0.349947
      0.415979
      0.392146
      0.700699
      0.716255
      -1.012125
      0.126804
      0.500819
      3
      3
      0
      0.978280
      2
    
    
      2
      2013-01-04
      A
      0.401183
      0.403806
      0.435841
      0.449066
      0.987334
      0.819545
      0.267360
      0.677702
      0.500626
      4
      4
      0
      1.204734
      1
    
    
      3
      2013-01-07
      A
      0.432769
      0.393832
      0.472742
      0.427806
      -0.209274
      0.855693
      -1.392324
      -0.246701
      0.487349
      7
      0
      0
      0.244567
      0
    
    
      4
      2013-01-08
      A
      0.423843
      0.392503
      0.452873
      0.404490
      -0.079900
      0.802002
      0.196373
      -0.273024
      0.491539
      8
      1
      0
      0.099626
      2

redefine training method



In [16]:

    
print Kvlad.shape
print src_vlad.data.shape
print src_train.data.shape


def _fitntestRandomForest( train, vlad, max_nodes=1024, steps=100, model_dir='/tmp/rf') :
    # build fit & test random forest for input
    fsize = len(train.data.columns)
    nclasses = len(train.target.unique())

    hparams = tf.contrib.tensor_forest.python.tensor_forest.ForestHParams(
        num_trees=nclasses, max_nodes=max_nodes, num_classes=nclasses, num_features=fsize)
    classifier = tf.contrib.learn.TensorForestEstimator(hparams)
    
    tdata = train.data.as_matrix().astype(np.float32)
    ttgt = train.target.as_matrix().astype(np.float32)
    vdata = vlad.data.as_matrix().astype(np.float32)
    vtgt = vlad.target.as_matrix().astype(np.float32)

    monitors = [tf.contrib.learn.TensorForestLossMonitor(10, 10)]
    classifier.fit(x=tdata, y=ttgt, steps=steps, monitors=monitors)
    result = classifier.evaluate(x=vdata, y=vtgt)#, steps=np.round(steps/10)

    print('Accuracy: {0:f}'.format(result["accuracy"]))
    return result,classifier









    



(623382, 16)
(623382, 8)
(2102547, 8)



In [17]:

    
# let's train our model
src_rf   = _fitntestRandomForest(train=src_train, vlad=src_vlad, model_dir='/tmp/src_rf',steps=100)









    



WARNING:tensorflow:Using temporary folder as model directory: /tmp/tmpt5rOHF
WARNING:tensorflow:Using temporary folder as model directory: /tmp/tmpt5rOHF
WARNING:tensorflow:Using default config.
WARNING:tensorflow:Using default config.
INFO:tensorflow:Using config: {'task': 0, 'save_summary_steps': 100, 'keep_checkpoint_max': 5, '_is_chief': True, 'save_checkpoints_secs': 600, 'evaluation_master': '', 'tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, 'master': '', 'keep_checkpoint_every_n_hours': 10000, '_job_name': None, 'cluster_spec': None, 'tf_random_seed': None, 'num_ps_replicas': 0}
INFO:tensorflow:Using config: {'task': 0, 'save_summary_steps': 100, 'keep_checkpoint_max': 5, '_is_chief': True, 'save_checkpoints_secs': 600, 'evaluation_master': '', 'tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, 'master': '', 'keep_checkpoint_every_n_hours': 10000, '_job_name': None, 'cluster_spec': None, 'tf_random_seed': None, 'num_ps_replicas': 0}
INFO:tensorflow:Setting feature info to TensorSignature(dtype=tf.float32, shape=TensorShape([Dimension(None), Dimension(8)]), is_sparse=False)
INFO:tensorflow:Setting feature info to TensorSignature(dtype=tf.float32, shape=TensorShape([Dimension(None), Dimension(8)]), is_sparse=False)
INFO:tensorflow:Setting targets info to TensorSignature(dtype=tf.float32, shape=TensorShape([Dimension(None)]), is_sparse=False)
INFO:tensorflow:Setting targets info to TensorSignature(dtype=tf.float32, shape=TensorShape([Dimension(None)]), is_sparse=False)
INFO:tensorflow:Constructing forest with params = 
INFO:tensorflow:Constructing forest with params = 
INFO:tensorflow:{'valid_leaf_threshold': 1, 'split_after_samples': 250, 'num_output_columns': 4, 'feature_bagging_fraction': 1.0, 'split_initializations_per_input': 1, 'bagged_features': None, 'min_split_samples': 5, 'max_nodes': 1024, 'num_features': 8, 'num_trees': 3, 'num_splits_to_consider': 10, 'base_random_seed': 0, 'num_outputs': 1, 'max_fertile_nodes': 512, 'bagged_num_features': 8, 'bagging_fraction': 1.0, 'regression': False, 'num_classes': 3}
INFO:tensorflow:{'valid_leaf_threshold': 1, 'split_after_samples': 250, 'num_output_columns': 4, 'feature_bagging_fraction': 1.0, 'split_initializations_per_input': 1, 'bagged_features': None, 'min_split_samples': 5, 'max_nodes': 1024, 'num_features': 8, 'num_trees': 3, 'num_splits_to_consider': 10, 'base_random_seed': 0, 'num_outputs': 1, 'max_fertile_nodes': 512, 'bagged_num_features': 8, 'bagging_fraction': 1.0, 'regression': False, 'num_classes': 3}
INFO:tensorflow:data path: /usr/lib/python2.7/site-packages/tensorflow/contrib/tensor_forest/python/ops/_training_ops.so
INFO:tensorflow:data path: /usr/lib/python2.7/site-packages/tensorflow/contrib/tensor_forest/python/ops/_training_ops.so
INFO:tensorflow:data path: /usr/lib/python2.7/site-packages/tensorflow/contrib/tensor_forest/python/ops/_inference_ops.so
INFO:tensorflow:data path: /usr/lib/python2.7/site-packages/tensorflow/contrib/tensor_forest/python/ops/_inference_ops.so
INFO:tensorflow:Create CheckpointSaverHook
INFO:tensorflow:Create CheckpointSaverHook
INFO:tensorflow:loss = -0.0, step = 1
INFO:tensorflow:loss = -0.0, step = 1
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpt5rOHF/model.ckpt.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpt5rOHF/model.ckpt.
INFO:tensorflow:Saving checkpoints for 41 into /tmp/tmpt5rOHF/model.ckpt.
INFO:tensorflow:Saving checkpoints for 41 into /tmp/tmpt5rOHF/model.ckpt.
INFO:tensorflow:Loss for final step: -1022.0.
INFO:tensorflow:Loss for final step: -1022.0.
WARNING:tensorflow:Given features: Tensor("input:0", shape=(?, 8), dtype=float32), required signatures: TensorSignature(dtype=tf.float32, shape=TensorShape([Dimension(None), Dimension(8)]), is_sparse=False).
WARNING:tensorflow:Given features: Tensor("input:0", shape=(?, 8), dtype=float32), required signatures: TensorSignature(dtype=tf.float32, shape=TensorShape([Dimension(None), Dimension(8)]), is_sparse=False).
WARNING:tensorflow:Given targets: Tensor("output:0", shape=(?,), dtype=float32), required signatures: TensorSignature(dtype=tf.float32, shape=TensorShape([Dimension(None)]), is_sparse=False).
WARNING:tensorflow:Given targets: Tensor("output:0", shape=(?,), dtype=float32), required signatures: TensorSignature(dtype=tf.float32, shape=TensorShape([Dimension(None)]), is_sparse=False).
INFO:tensorflow:Constructing forest with params = 
INFO:tensorflow:Constructing forest with params = 
INFO:tensorflow:{'valid_leaf_threshold': 1, 'split_after_samples': 250, 'num_output_columns': 4, 'feature_bagging_fraction': 1.0, 'split_initializations_per_input': 1, 'bagged_features': None, 'min_split_samples': 5, 'max_nodes': 1024, 'num_features': 8, 'num_trees': 3, 'num_splits_to_consider': 10, 'base_random_seed': 0, 'num_outputs': 1, 'max_fertile_nodes': 512, 'bagged_num_features': 8, 'bagging_fraction': 1.0, 'regression': False, 'num_classes': 3}
INFO:tensorflow:{'valid_leaf_threshold': 1, 'split_after_samples': 250, 'num_output_columns': 4, 'feature_bagging_fraction': 1.0, 'split_initializations_per_input': 1, 'bagged_features': None, 'min_split_samples': 5, 'max_nodes': 1024, 'num_features': 8, 'num_trees': 3, 'num_splits_to_consider': 10, 'base_random_seed': 0, 'num_outputs': 1, 'max_fertile_nodes': 512, 'bagged_num_features': 8, 'bagging_fraction': 1.0, 'regression': False, 'num_classes': 3}
INFO:tensorflow:Restored model from /tmp/tmpt5rOHF
INFO:tensorflow:Restored model from /tmp/tmpt5rOHF
INFO:tensorflow:Eval steps [0,inf) for training step 41.
INFO:tensorflow:Eval steps [0,inf) for training step 41.
INFO:tensorflow:Input iterator is exhausted.
INFO:tensorflow:Input iterator is exhausted.
INFO:tensorflow:Saving evaluation summary for 41 step: accuracy = 0.404365
INFO:tensorflow:Saving evaluation summary for 41 step: accuracy = 0.404365






    



Accuracy: 0.404365



In [18]:

    
# now let's use our trained model to fit the validation set
vdata = src_vlad.data.as_matrix().astype(np.float32)
vtgt = src_vlad.target.as_matrix().astype(np.float32)
p=src_rf[1].predict( x=vdata)

# how'd it do?
R = pd.DataFrame( {'predicted':p,'actual':vtgt})
R['dist'] = np.abs(R.actual-R.predicted)

# avg distance is meaningful.  a null predictor should get about .88, 
#  so anything below provides some edge
print R.dist.mean()
twos=R.dist[R.dist==2]
len(twos.index)/float(len(R.index))









    



WARNING:tensorflow:Calling predict (from tensorflow.contrib.learn.python.learn.estimators.random_forest) with as_iterable=False is deprecated and will be removed after 2016-09-15.
Instructions for updating:
The default behavior of predict() is changing. The default value for
as_iterable will change to True, and then the flag will be removed
altogether. The behavior of this flag is described below.
WARNING:tensorflow:Calling predict (from tensorflow.contrib.learn.python.learn.estimators.random_forest) with as_iterable=False is deprecated and will be removed after 2016-09-15.
Instructions for updating:
The default behavior of predict() is changing. The default value for
as_iterable will change to True, and then the flag will be removed
altogether. The behavior of this flag is described below.
WARNING:tensorflow:Calling predict_proba (from tensorflow.contrib.learn.python.learn.estimators.random_forest) with as_iterable=False is deprecated and will be removed after 2016-09-15.
Instructions for updating:
The default behavior of predict() is changing. The default value for
as_iterable will change to True, and then the flag will be removed
altogether. The behavior of this flag is described below.
WARNING:tensorflow:Calling predict_proba (from tensorflow.contrib.learn.python.learn.estimators.random_forest) with as_iterable=False is deprecated and will be removed after 2016-09-15.
Instructions for updating:
The default behavior of predict() is changing. The default value for
as_iterable will change to True, and then the flag will be removed
altogether. The behavior of this flag is described below.
WARNING:tensorflow:Calling predict (from tensorflow.contrib.learn.python.learn.estimators.estimator) with as_iterable=False is deprecated and will be removed after 2016-09-15.
Instructions for updating:
The default behavior of predict() is changing. The default value for
as_iterable will change to True, and then the flag will be removed
altogether. The behavior of this flag is described below.
WARNING:tensorflow:Calling predict (from tensorflow.contrib.learn.python.learn.estimators.estimator) with as_iterable=False is deprecated and will be removed after 2016-09-15.
Instructions for updating:
The default behavior of predict() is changing. The default value for
as_iterable will change to True, and then the flag will be removed
altogether. The behavior of this flag is described below.
INFO:tensorflow:Constructing forest with params = 
INFO:tensorflow:Constructing forest with params = 
INFO:tensorflow:{'valid_leaf_threshold': 1, 'split_after_samples': 250, 'num_output_columns': 4, 'feature_bagging_fraction': 1.0, 'split_initializations_per_input': 1, 'bagged_features': None, 'min_split_samples': 5, 'max_nodes': 1024, 'num_features': 8, 'num_trees': 3, 'num_splits_to_consider': 10, 'base_random_seed': 0, 'num_outputs': 1, 'max_fertile_nodes': 512, 'bagged_num_features': 8, 'bagging_fraction': 1.0, 'regression': False, 'num_classes': 3}
INFO:tensorflow:{'valid_leaf_threshold': 1, 'split_after_samples': 250, 'num_output_columns': 4, 'feature_bagging_fraction': 1.0, 'split_initializations_per_input': 1, 'bagged_features': None, 'min_split_samples': 5, 'max_nodes': 1024, 'num_features': 8, 'num_trees': 3, 'num_splits_to_consider': 10, 'base_random_seed': 0, 'num_outputs': 1, 'max_fertile_nodes': 512, 'bagged_num_features': 8, 'bagging_fraction': 1.0, 'regression': False, 'num_classes': 3}
INFO:tensorflow:Loading model from checkpoint: /tmp/tmpt5rOHF/model.ckpt-41-?????-of-00001.
INFO:tensorflow:Loading model from checkpoint: /tmp/tmpt5rOHF/model.ckpt-41-?????-of-00001.






    



0.713405251503






    Out[18]:





0.11777048422957352



In [19]:

    
# ok, let's create a df with date,symbol and prediction which we'll then join onto the simulation dataset
V = pd.DataFrame( {'Date':          Kvlad.Date,
                   'Sym':           Kvlad.Sym,
                   'MLPrediction':  R.predicted })
#Kvlad.head()
print U.shape
print V.shape
V.head()

Uv = U[U.index >= V.Date.min()]
print Uv.shape

Uv.reset_index(inplace=True)
Uv.head()
#V.set_index('Date',inplace=True)

Uml = Uv.merge( V, how='left', on=['Date','Sym'] )
Uml.sort_values(['Date','Sym'],inplace=True)

Uml.set_index('Date',inplace=True)
Uml.head()









    



(1917012, 19)
(623382, 3)
(623382, 19)






    Out[19]:






  
    
      
      Sym
      Product
      Instrument
      Multiplier
      Expiry
      Strike
      Open
      High
      Low
      Close
      Volume
      ADV
      DeltaV
      Return
      Fwd_Close
      Fwd_Return
      SD
      Fwd_Open
      Fwd_COReturn
      MLPrediction
    
    
      Date
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2013-01-02
      A
      A
      STK
      1.0
      None
      None
      40.589711
      40.589711
      39.607935
      40.310579
      6287700.0
      3414135.0
      0.624395
      0.022701
      40.454958
      0.003575
      0.016242
      40.368330
      0.001433
      0
    
    
      2013-01-02
      AAPL
      AAPL
      STK
      1.0
      None
      None
      72.715030
      72.869961
      71.114517
      72.086117
      140129500.0
      151659410.0
      -0.162609
      0.031185
      71.175689
      -0.012710
      0.024211
      71.935125
      -0.002095
      0
    
    
      2013-01-02
      ABC
      ABC
      STK
      1.0
      None
      None
      41.723424
      41.867000
      41.369267
      41.627706
      1972400.0
      2635575.0
      0.420586
      0.007154
      41.541560
      -0.002072
      0.008505
      41.675565
      0.001150
      1
    
    
      2013-01-02
      ABT
      ABT
      STK
      1.0
      None
      None
      29.690011
      29.827891
      29.083342
      29.460212
      20266400.0
      14864545.0
      0.063573
      0.018151
      30.581631
      0.037359
      0.008568
      30.241528
      0.026521
      0
    
    
      2013-01-02
      ACAS
      ACAS
      STK
      1.0
      None
      None
      12.250000
      12.400000
      12.240000
      12.370000
      5398600.0
      2879480.0
      0.326574
      0.028702
      12.580000
      0.016834
      0.008817
      12.370000
      0.000000
      0

we've annoted the simulation dataset with the ML predictions.

we need to integrate this data into the actual strategy:



In [20]:

    
def bestworst_ML( U,  cfg, kvargs ) :
    """ Buy the prior period's losers and sell the prior period's winners in 
          proportion to their over- or under-performance of the equal-weighted market.
        Then, cross-reference ML's views with this.  The possibilities are:
         - Good: We're buying something predicted to go up
         - Good: We're selling something predicted to go down
         - OK  : We're buying something predicted to stay flat
         - OK  : We're selling something predicted to stay flat
         - Bad : We're buying something predicted to go down
         - Bad : We're selling something predicted to go up

         We leave the *OK*s alone and deallocate from the *Bad*s according to 
         value in kvargs 'realloc' and reallocate to the *Goods*.
    """    
    realloc = kvargs.get('realloc', 0.5)
    N = len(U.index)
    mktR = U.Return.mean()
    Weight = np.add( U.Return, -mktR ) / (-N)
    # now let's ensure that we spend 100% on each side
    U.Weight = 2 * np.sign(Weight) * (abs(Weight) / sum(abs(Weight)))

    # now, let's add-in our ML insights
    # we're going to deallocate from these guys
    bad1 = np.logical_and(U.MLPrediction==0, U.Weight > 0) 
    bad2 = np.logical_and(U.MLPrediction==2, U.Weight < 0)
    bads = np.logical_or(bad1, bad2) 
    lbads = np.logical_and(bads, U.Weight>0)
    sbads = np.logical_and(bads, U.Weight<0)
    
    # and reallocate to these
    good1 = np.logical_and(U.MLPrediction==0, U.Weight < 0)
    good2 = np.logical_and(U.MLPrediction==2, U.Weight > 0)
    goods = np.logical_or( good1, good2 ) 
    lgoods = np.logical_and(goods, U.Weight>0)
    sgoods = np.logical_and(goods, U.Weight<0)
    numlgoods = len(U[lgoods].index)
    numsgoods = len(U[sgoods].index)
    
    # how much weight to add to longs & shorts?
    lwt = U[lbads].Weight.sum() * realloc
    swt = U[sbads].Weight.sum() * realloc

    # let's deallocate from bads
    U.Weight = np.where( bads, U.Weight * (1-realloc),U.Weight)

    # and allocate to goods long & short
    if numlgoods > 0:
        U.Weight = np.where( lgoods, U.Weight + (lwt/numlgoods), U.Weight )
    if numsgoods > 0:
        U.Weight = np.where( sgoods, U.Weight + (swt/numsgoods), U.Weight )

    #    pdb.set_trace()

    return U



In [21]:

    
# first, what is our baseline over this period?  Let's look at equal-weight and bestworst and then bestworst_ML

_,Beq   = sim.sim(Uml)
_,Bbw   = sim.sim(Uml,sim_FUN=bestworst)
kvargs = {'realloc': 0.5}
_,Bbwml = sim.sim(Uml,sim_FUN=bestworst_ML,kvargs=kvargs)









    



INFO:root:ran over 971 days and 623382 rows in 37 secs
INFO:root:ran over 971 days and 623382 rows in 38 secs
INFO:root:ran over 971 days and 623382 rows in 49 secs



In [22]:

    
Beq.NAV.plot(color='black')
Bbw.NAV.plot(color='green')
Bbwml.NAV.plot(color='blue')









    Out[22]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fd2c9905e90>



In [23]:

    
# let's try reallocating the whole shebang
kvargs = {'realloc': 1}
_,Bbwml1 = sim.sim(Uml,sim_FUN=bestworst_ML,kvargs=kvargs)









    



INFO:root:ran over 971 days and 623382 rows in 48 secs



In [24]:

    
Beq.NAV.plot(color='black')
Bbw.NAV.plot(color='green')
Bbwml.NAV.plot(color='blue')
Bbwml1.NAV.plot(color='red')

def pyfolio_tf ( Balances ):
    Balances.index=Balances.index.tz_localize('UTC')
    pf.create_full_tear_sheet(Balances.NET_Return)



In [25]:

    
print sim.sharpe(Beq.NET_Return)
print sim.sharpe(Bbw.NET_Return)
print sim.sharpe(Bbwml.NET_Return)
print sim.sharpe(Bbwml1.NET_Return)

pyfolio_tf(Bbwml1)









    



0.951649524258
0.606950162164
0.872650127516
1.0875584869
Entire data start date: 2013-01-02
Entire data end date: 2016-11-04


Backtest Months: 46






    






  
    
      Performance statistics
      Backtest
    
  
  
    
      cum_returns_final
      0.62
    
    
      annual_return
      0.13
    
    
      annual_volatility
      0.12
    
    
      sharpe_ratio
      1.09
    
    
      calmar_ratio
      1.02
    
    
      stability_of_timeseries
      0.91
    
    
      max_drawdown
      -0.13
    
    
      omega_ratio
      1.23
    
    
      sortino_ratio
      1.69
    
    
      skew
      0.65
    
    
      kurtosis
      7.57
    
    
      tail_ratio
      1.05
    
    
      common_sense_ratio
      1.19
    
    
      information_ratio
      0.00
    
    
      alpha
      0.11
    
    
      beta
      0.16
    
  








    






  
    
      Worst Drawdown Periods
      net drawdown in %
      peak date
      valley date
      recovery date
      duration
    
  
  
    
      0
      13.07
      2015-09-04
      2016-01-13
      2016-03-08
      133
    
    
      1
      11.95
      2013-02-26
      2013-06-24
      2013-12-30
      220
    
    
      2
      8.85
      2014-09-12
      2014-10-16
      2014-11-10
      42
    
    
      3
      6.83
      2015-06-17
      2015-08-04
      2015-08-10
      39
    
    
      4
      5.62
      2016-09-28
      2016-10-31
      NaT
      NaN
    
  








    




[-0.015 -0.031]






    












    






  
    
      Stress Events
      mean
      min
      max
    
  
  
    
      Apr14
      0.05%
      -1.01%
      1.33%
    
    
      Oct14
      0.03%
      -1.99%
      1.81%
    
    
      Fall2015
      -0.13%
      -1.59%
      4.25%
    
    
      New Normal
      0.05%
      -4.30%
      5.44%

That seems a pretty compelling result.

The market since 2013 has been odd - the 'new normal.' Simply equal-weighting this upward trending period yields a stout .99 Sharpe while the (long/short) baseline bestworst strat struggled to reach .70. Adding the quite limited edge from the random forest model improved things, though how much depends on how heavily you trusted its results. If you trust it about the same as the bestworst model, it improve the ratio to a .95 ratio. Trusting it entirely such that it over-rides the bestworst model in disagreements yields a 1.16 ratio with annual returns of 14% and volatility of 12%.

Let's see about a strategy which is equal weighting the ML's predictions

what might that look like?

Let's start simple and largely disregard liquidity concerns and simply pack equal-dollars into long and short sides where we buy things predicted to go up, sell things predicted to go down and ignore things predicted to stay flattish.

If we are holding a position and today it signals 'flat', should we just hold it?
should we always equal weight or should we tilt in direction of market prediction?
...



In [ ]:

	Multiplier	Open	High	Low	Close	Volume	ADV	DeltaV	Return	Fwd_Close	Fwd_Return	SD	Fwd_Open	Fwd_COReturn
count	2725932.0	2.725932e+06	2.725932e+06	2.725932e+06	2.725932e+06	2.725932e+06	2.725931e+06	2.725931e+06	2.725931e+06	2.725932e+06	2.725932e+06	2.725929e+06	2.725932e+06	2.725932e+06
mean	1.0	2.226893e+02	2.243057e+02	2.208029e+02	2.226456e+02	4.025552e+06	4.023279e+06	2.661003e-04	2.940238e-04	2.227142e+02	2.940263e-04	2.206396e-02	2.227574e+02	3.153228e-04
std	0.0	5.015723e+03	5.042574e+03	4.979409e+03	5.012831e+03	1.212138e+07	1.086728e+07	4.786192e-01	2.754559e-02	5.014403e+03	2.754558e-02	1.668339e-02	5.017295e+03	1.402322e-02
min	1.0	1.455814e-02	7.864865e-02	1.455814e-02	5.898649e-02	4.300000e+01	1.000000e+02	-9.486203e+00	-3.570812e+00	5.898649e-02	-3.570812e+00	0.000000e+00	1.455814e-02	-9.997442e-01
25%	1.0	1.536626e+01	1.560987e+01	1.511146e+01	1.536896e+01	6.116000e+05	6.956722e+05	-2.777810e-01	-1.029169e-02	1.537259e+01	-1.029169e-02	1.215633e-02	1.537000e+01	-3.631412e-03
50%	1.0	2.655789e+01	2.693000e+01	2.617679e+01	2.656049e+01	1.462800e+06	1.580758e+06	-1.145079e-02	2.200946e-04	2.656879e+01	2.200946e-04	1.756784e-02	2.656501e+01	0.000000e+00
75%	1.0	4.509414e+01	4.563840e+01	4.454000e+01	4.510266e+01	3.598300e+06	3.713368e+06	2.617968e-01	1.102714e-02	4.511558e+01	1.102714e-02	2.636890e-02	4.510598e+01	4.241282e-03
max	1.0	2.281800e+05	2.293740e+05	2.275300e+05	2.293000e+05	2.304019e+09	6.079510e+08	1.032482e+01	1.449269e+00	2.293000e+05	1.449269e+00	8.122759e-01	2.281800e+05	3.078370e+00

	Multiplier	Open	High	Low	Close	Volume	ADV	DeltaV	Return	Fwd_Close	Fwd_Return	SD	Fwd_Open	Fwd_COReturn
count	1917012.0	1.917012e+06	1.917012e+06	1.917012e+06	1.917012e+06	1.917012e+06	1.917012e+06	1.917012e+06	1.917012e+06	1.917012e+06	1.917012e+06	1.917012e+06	1.917012e+06	1.917012e+06
mean	1.0	2.591351e+02	2.609176e+02	2.569808e+02	2.590585e+02	4.508978e+06	4.505266e+06	3.556294e-04	2.319738e-04	2.591378e+02	2.378314e-04	2.032868e-02	2.592140e+02	2.992531e-04
std	0.0	5.682414e+03	5.712009e+03	5.641011e+03	5.678604e+03	1.328341e+07	1.195089e+07	4.317016e-01	2.563653e-02	5.680366e+03	2.563016e-02	1.570851e-02	5.684177e+03	1.331522e-02
min	1.0	7.864865e-02	7.864865e-02	4.915541e-02	5.898649e-02	4.300000e+01	1.685500e+02	-9.486203e+00	-3.273741e+00	5.898649e-02	-3.273741e+00	0.000000e+00	7.864865e-02	-9.599594e-01
25%	1.0	1.957748e+01	1.987066e+01	1.928268e+01	1.958098e+01	7.638000e+05	8.509638e+05	-2.612910e-01	-9.522656e-03	1.958591e+01	-9.513844e-03	1.145206e-02	1.958000e+01	-3.644035e-03
50%	1.0	3.248337e+01	3.288977e+01	3.206036e+01	3.248649e+01	1.722242e+06	1.848915e+06	-1.079121e-02	3.295436e-04	3.249498e+01	3.345321e-04	1.620110e-02	3.249060e+01	0.000000e+00
75%	1.0	5.280824e+01	5.339683e+01	5.221000e+01	5.281635e+01	4.046400e+06	4.149695e+06	2.466611e-01	1.030708e-02	5.283295e+01	1.031024e-02	2.387542e-02	5.282686e+01	4.275697e-03
max	1.0	2.281800e+05	2.293740e+05	2.275300e+05	2.293000e+05	2.304019e+09	6.079510e+08	1.032482e+01	1.356115e+00	2.293000e+05	1.356115e+00	7.328309e-01	2.281800e+05	3.078370e+00

Performance statistics	Backtest
cum_returns_final	1.68
annual_return	0.09
annual_volatility	0.18
sharpe_ratio	0.55
calmar_ratio	0.18
stability_of_timeseries	0.90
max_drawdown	-0.47
omega_ratio	1.14
sortino_ratio	0.81
skew	0.50
kurtosis	24.63
tail_ratio	0.97
common_sense_ratio	1.05
information_ratio	0.01
alpha	0.03
beta	0.84

Worst Drawdown Periods	net drawdown in %	peak date	valley date	recovery date	duration
0	47.27	2007-10-09	2008-11-20	2009-08-04	476
1	13.25	2011-04-29	2011-10-03	2012-01-25	194
2	10.35	2010-04-23	2010-07-06	2010-11-02	138
3	8.71	2006-05-08	2006-06-13	2006-10-12	114
4	8.45	2015-05-18	2016-02-11	2016-06-07	277

Stress Events	mean	min	max
Lehmann	-0.08%	-6.09%	3.96%
US downgrade/European Debt Crisis	-0.03%	-4.35%	3.15%
Fukushima	0.09%	-0.76%	0.87%
EZB IR Event	-0.02%	-0.72%	0.66%
Aug07	0.06%	-2.09%	1.71%
Mar08	0.15%	-1.97%	3.26%
Sept08	-0.28%	-6.09%	3.96%
2009Q1	-0.40%	-6.34%	6.18%
2009Q2	0.67%	-6.51%	10.22%
Flash Crash	-0.16%	-1.92%	3.08%
Apr14	0.00%	-0.84%	0.43%
Oct14	0.06%	-0.95%	0.86%
Fall2015	-0.11%	-1.39%	1.20%
Low Volatility Bull Market	0.06%	-2.40%	2.51%
GFC Crash	-0.03%	-10.54%	11.28%
Recovery	0.08%	-4.79%	5.42%
New Normal	0.02%	-1.56%	1.26%

Performance statistics	Backtest
cum_returns_final	3.08
annual_return	0.13
annual_volatility	0.26
sharpe_ratio	0.58
calmar_ratio	0.24
stability_of_timeseries	0.80
max_drawdown	-0.53
omega_ratio	1.16
sortino_ratio	0.92
skew	2.31
kurtosis	47.08
tail_ratio	0.98
common_sense_ratio	1.11
information_ratio	0.02
alpha	0.07
beta	0.95

Worst Drawdown Periods	net drawdown in %	peak date	valley date	recovery date	duration
0	52.62	2007-06-04	2008-11-20	2009-01-02	415
1	30.84	2009-01-08	2009-03-06	2009-03-23	53
2	19.09	2005-02-02	2005-04-15	2005-05-31	85
3	15.90	2014-11-24	2016-01-19	2016-06-06	401
4	14.88	2011-05-02	2011-10-03	2014-02-13	729

Stress Events	mean	min	max
Lehmann	-0.19%	-10.39%	8.86%
US downgrade/European Debt Crisis	0.01%	-3.72%	4.66%
Fukushima	0.04%	-0.86%	0.76%
EZB IR Event	-0.02%	-0.94%	0.74%
Aug07	-0.14%	-3.25%	2.33%
Mar08	0.16%	-3.45%	7.80%
Sept08	-0.57%	-10.39%	8.86%
2009Q1	-0.21%	-7.97%	7.12%
2009Q2	0.80%	-7.72%	12.51%
Flash Crash	0.02%	-1.59%	2.55%
Apr14	0.03%	-1.08%	1.06%
Oct14	0.06%	-1.47%	1.07%
Fall2015	-0.12%	-1.60%	2.59%
Low Volatility Bull Market	0.09%	-4.64%	3.79%
GFC Crash	0.10%	-15.02%	26.43%
Recovery	0.07%	-4.07%	9.36%
New Normal	0.02%	-2.58%	3.63%

Performance statistics	Backtest
cum_returns_final	1.54
annual_return	0.08
annual_volatility	0.15
sharpe_ratio	0.59
calmar_ratio	0.24
stability_of_timeseries	0.81
max_drawdown	-0.34
omega_ratio	1.14
sortino_ratio	0.91
skew	1.53
kurtosis	30.73
tail_ratio	1.00
common_sense_ratio	1.08
information_ratio	0.00
alpha	0.04
beta	0.58

Worst Drawdown Periods	net drawdown in %	peak date	valley date	recovery date	duration
0	34.39	2007-06-04	2008-11-20	2009-01-02	415
1	20.25	2009-01-08	2009-03-06	2009-03-23	53
2	12.67	2014-11-24	2016-01-19	2016-06-06	401
3	11.70	2011-05-02	2011-10-03	2014-02-13	729
4	9.63	2005-02-02	2005-04-15	2005-05-31	85

Performance statistics	Backtest
cum_returns_final	1.17
annual_return	0.07
annual_volatility	0.16
sharpe_ratio	0.48
calmar_ratio	0.16
stability_of_timeseries	0.75
max_drawdown	-0.41
omega_ratio	1.12
sortino_ratio	0.75
skew	1.95
kurtosis	41.80
tail_ratio	1.12
common_sense_ratio	1.20
information_ratio	-0.00
alpha	0.03
beta	0.56

Worst Drawdown Periods	net drawdown in %	peak date	valley date	recovery date	duration
0	41.25	2008-09-19	2009-03-06	2009-04-03	141
1	17.00	2008-02-01	2008-07-15	2008-09-08	157
2	15.93	2013-10-22	2016-01-19	2016-03-07	620
3	10.93	2010-04-23	2011-08-10	2013-05-03	791
4	8.28	2005-01-03	2005-04-28	2005-07-08	135

Stress Events	mean	min	max
Lehmann	0.14%	-4.62%	5.01%
US downgrade/European Debt Crisis	-0.01%	-2.42%	2.16%
Fukushima	0.00%	-0.44%	0.69%
EZB IR Event	-0.03%	-0.77%	0.80%
Aug07	0.25%	-1.89%	2.58%
Mar08	-0.15%	-2.22%	2.58%
Sept08	-0.14%	-4.62%	5.01%
2009Q1	-0.26%	-7.88%	9.61%
2009Q2	1.06%	-6.78%	15.36%
Flash Crash	-0.12%	-1.11%	1.43%
Apr14	-0.07%	-1.22%	0.49%
Oct14	0.00%	-0.98%	0.67%
Fall2015	-0.04%	-1.49%	1.10%
Low Volatility Bull Market	0.02%	-1.81%	2.93%
GFC Crash	0.04%	-7.88%	15.36%
Recovery	0.06%	-4.65%	5.94%
New Normal	0.01%	-4.89%	3.58%

Performance statistics	Backtest
cum_returns_final	1.61
annual_return	0.08
annual_volatility	0.11
sharpe_ratio	0.82
calmar_ratio	0.57
stability_of_timeseries	0.87
max_drawdown	-0.15
omega_ratio	1.19
sortino_ratio	1.27
skew	0.83
kurtosis	14.87
tail_ratio	1.05
common_sense_ratio	1.14
information_ratio	0.00
alpha	0.08
beta	0.09

Worst Drawdown Periods	net drawdown in %	peak date	valley date	recovery date	duration
0	14.82	2007-07-16	2008-01-23	2008-03-13	174
1	8.92	2008-05-02	2008-07-17	2008-07-31	65
2	8.83	2009-02-12	2009-03-18	2009-04-29	55
3	8.30	2013-02-26	2013-06-24	2014-08-18	385
4	7.92	2008-08-14	2008-09-17	2008-09-30	34

Stress Events	mean	min	max
Lehmann	0.04%	-2.18%	5.53%
US downgrade/European Debt Crisis	-0.11%	-1.51%	2.46%
Fukushima	0.01%	-0.58%	0.33%
EZB IR Event	-0.02%	-0.49%	0.57%
Aug07	-0.19%	-2.71%	4.00%
Mar08	0.15%	-1.71%	2.09%
Sept08	0.19%	-2.18%	5.53%
2009Q1	0.18%	-2.60%	4.97%
2009Q2	0.09%	-4.01%	4.66%
Flash Crash	0.07%	-0.52%	1.54%
Apr14	0.03%	-0.60%	0.78%
Oct14	0.04%	-0.92%	0.87%
Fall2015	-0.08%	-0.91%	0.96%
Low Volatility Bull Market	0.04%	-2.27%	2.02%
GFC Crash	0.09%	-5.53%	6.04%
Recovery	0.02%	-4.01%	4.55%
New Normal	0.02%	-3.03%	4.04%

	Date	Sym	Open	High	Low	Close	Volume	ADV	DeltaV	Return	SD	DOY	DOW	VARATIO	Label
0	2013-01-02	A	0.403243	0.351942	0.385458	0.381859	1.979379	0.524618	1.490612	0.786138	0.515381	2	2	3.772991	1
1	2013-01-03	A	0.387450	0.349947	0.415979	0.392146	0.700699	0.716255	-1.012125	0.126804	0.500819	3	3	0.978280	2
2	2013-01-04	A	0.401183	0.403806	0.435841	0.449066	0.987334	0.819545	0.267360	0.677702	0.500626	4	4	1.204734	1
3	2013-01-07	A	0.432769	0.393832	0.472742	0.427806	-0.209274	0.855693	-1.392324	-0.246701	0.487349	7	0	0.244567	0
4	2013-01-08	A	0.423843	0.392503	0.452873	0.404490	-0.079900	0.802002	0.196373	-0.273024	0.491539	8	1	0.099626	2

	Sym	Product	Instrument	Multiplier	Expiry	Strike	Open	High	Low	Close	Volume	ADV	DeltaV	Return	Fwd_Close	Fwd_Return	SD	Fwd_Open	Fwd_COReturn	MLPrediction
Date
2013-01-02	A	A	STK	1.0	None	None	40.589711	40.589711	39.607935	40.310579	6287700.0	3414135.0	0.624395	0.022701	40.454958	0.003575	0.016242	40.368330	0.001433	0
2013-01-02	AAPL	AAPL	STK	1.0	None	None	72.715030	72.869961	71.114517	72.086117	140129500.0	151659410.0	-0.162609	0.031185	71.175689	-0.012710	0.024211	71.935125	-0.002095	0
2013-01-02	ABC	ABC	STK	1.0	None	None	41.723424	41.867000	41.369267	41.627706	1972400.0	2635575.0	0.420586	0.007154	41.541560	-0.002072	0.008505	41.675565	0.001150	1
2013-01-02	ABT	ABT	STK	1.0	None	None	29.690011	29.827891	29.083342	29.460212	20266400.0	14864545.0	0.063573	0.018151	30.581631	0.037359	0.008568	30.241528	0.026521	0
2013-01-02	ACAS	ACAS	STK	1.0	None	None	12.250000	12.400000	12.240000	12.370000	5398600.0	2879480.0	0.326574	0.028702	12.580000	0.016834	0.008817	12.370000	0.000000	0

Performance statistics	Backtest
cum_returns_final	0.62
annual_return	0.13
annual_volatility	0.12
sharpe_ratio	1.09
calmar_ratio	1.02
stability_of_timeseries	0.91
max_drawdown	-0.13
omega_ratio	1.23
sortino_ratio	1.69
skew	0.65
kurtosis	7.57
tail_ratio	1.05
common_sense_ratio	1.19
information_ratio	0.00
alpha	0.11
beta	0.16

Worst Drawdown Periods	net drawdown in %	peak date	valley date	recovery date	duration
0	13.07	2015-09-04	2016-01-13	2016-03-08	133
1	11.95	2013-02-26	2013-06-24	2013-12-30	220
2	8.85	2014-09-12	2014-10-16	2014-11-10	42
3	6.83	2015-06-17	2015-08-04	2015-08-10	39
4	5.62	2016-09-28	2016-10-31	NaT	NaN

Stress Events	mean	min	max
Apr14	0.05%	-1.01%	1.33%
Oct14	0.03%	-1.99%	1.81%
Fall2015	-0.13%	-1.59%	4.25%
New Normal	0.05%	-4.30%	5.44%