Use zipline to create the pandas DataFrame and Series objects to pass into alphalense.

This example uses the talib (Technical Analysis) library function Accumulate/Distribute Oscillator as a test signal. The two global data objects are created with a zipline algo using the Dow 30 stocks as the universe and then passed to alphalens. We can then determine if there is a suitable trading signal from this classic technical analysis indicator.

Visit Quantopian at https://www.quantopian.com

Learn about zipline at https://github.com/quantopian/zipline

Learn about alphalens at https://github.com/quantopian/alphalens

And for some more information visit my site at: http://www.prokopyshen.com/zipline-talib-alphalens



In [1]:

    
# Get our notebook ready for zipline 
%matplotlib inline
%load_ext zipline



In [2]:

    
import pandas as pd
import talib
from zipline.api import symbol
import alphalens



In [3]:

    
# Create global variables to feed alphalens
dfPrice=pd.DataFrame()
seSig=pd.Series()



In [4]:

    
# Zipline algo

def initialize(context):
    context.iNDays=400   # How many days of data we want
    
    context.iADOFast=5   # talib AD Osc constant
    context.iADOSlow=14  # talib AD Osc constant
    
    # DJI 30
    context.secs=[]
    context.secs.append(symbol("AAPL")) # Apple
    context.secs.append(symbol("AXP")) # American Express
    context.secs.append(symbol("BA")) # Boeing
    context.secs.append(symbol("CAT")) # Caterpillar
    context.secs.append(symbol("CSCO")) # Cisco
    context.secs.append(symbol("CVX")) # Chevron
    context.secs.append(symbol("DD")) # E I du Pont de Nemours and Co
    context.secs.append(symbol("DIS")) # Disney
    context.secs.append(symbol("GE")) # General Electric
    context.secs.append(symbol("GS")) # Goldman Sachs
    context.secs.append(symbol("HD")) # Home Depot
    context.secs.append(symbol("IBM")) # IBM
    context.secs.append(symbol("INTC")) # Intel
    context.secs.append(symbol("JNJ")) # Johnson & Johnson
    context.secs.append(symbol("JPM")) # JPMorgan Chase
    context.secs.append(symbol("KO")) # Coca-Cola
    context.secs.append(symbol("MCD")) # McDonald's
    context.secs.append(symbol("MMM")) # 3M
    context.secs.append(symbol("MRK")) # Merck
    context.secs.append(symbol("MSFT")) # Microsoft
    context.secs.append(symbol("NKE")) # Nike
    context.secs.append(symbol("PFE")) # Pfizer
    context.secs.append(symbol("PG")) # Procter & Gamble
    context.secs.append(symbol("TRV")) # Travelers Companies Inc
    context.secs.append(symbol("UNH")) # UnitedHealth
    context.secs.append(symbol("UTX")) # United Technologies
    context.secs.append(symbol("V")) # Visa
    context.secs.append(symbol("VZ")) # Verizon
    context.secs.append(symbol("WMT")) # Wal-Mart
    context.secs.append(symbol("XOM")) # Exxon Mobil

def handle_data(context, data):
    global seSig   
    liSeries=[]  # Used to collect the series as we go

    # Get data
    dfP=data.history(context.secs,'price',context.iNDays,'1d')
    dfL=data.history(context.secs,'low',context.iNDays,'1d')
    dfH=data.history(context.secs,'high',context.iNDays,'1d')
    dfV=data.history(context.secs,'volume',context.iNDays,'1d')

    ixP=dfP.index  # This is the date 

    for S in context.secs:
        # Save our history for alphalens
        dfPrice[S.symbol]=dfP[S]
        
        # Normalize for tablib
        seP=dfP[S]/dfP[S].mean()
        seL=dfL[S]/dfL[S].mean()
        seH=dfH[S]/dfH[S].mean()
        seV=dfV[S]/dfV[S].mean()
        
        # Get our ta-value
        ndADosc=talib.ADOSC( \
            seP.values,seL.values,seH.values,seV.values, \
            context.iADOFast,context.iADOSlow)

        # alphalens requires that the Series used for the Signal 
        # have a MultiIndex consisting of date+symbol

        # Build a list of symbol names same length as our price data
        liW=[S.symbol]*len(ixP)
        # Make a tuple
        tuW=zip(ixP,liW)
        # Create the required MultiIndex
        miW=pd.MultiIndex.from_tuples(tuW,names=['date','sym'])
        # Create series
        seW=pd.Series(ndADosc,index=miW)
        # Save it for later
        liSeries.append(seW)

    # Now make the required series
    seSig=pd.concat(liSeries).dropna()

    return



In [5]:

    
# We only need to run zipline for one day.... not the whole period
# now run run zipline for last day in period of interest 
%zipline --start=2016-8-31 --end=2016-8-31  --capital-base=100000









    Out[5]:






  
    
      
      algo_volatility
      algorithm_period_return
      alpha
      benchmark_period_return
      benchmark_volatility
      beta
      capital_used
      ending_cash
      ending_exposure
      ending_value
      ...
      short_exposure
      short_value
      shorts_count
      sortino
      starting_cash
      starting_exposure
      starting_value
      trading_days
      transactions
      treasury_period_return
    
  
  
    
      2016-08-31 20:00:00+00:00
      None
      0.0
      None
      -0.002376
      None
      None
      0.0
      100000.0
      0.0
      0.0
      ...
      0
      0
      0
      None
      100000.0
      0.0
      0.0
      1
      []
      0.0158
    
  

1 rows × 38 columns



In [6]:

    
# Lets take a look at what got built
print type(dfPrice),"length=",len(dfPrice)
print dfPrice.head(3)
print dfPrice.tail(3)









    



<class 'pandas.core.frame.DataFrame'> length= 400
                              AAPL     AXP       BA     CAT    CSCO      CVX  \
2015-02-02 00:00:00+00:00  114.690  80.397  139.925  76.110  25.737   99.351   
2015-02-03 00:00:00+00:00  114.709  81.963  140.958  79.010  26.015  102.601   
2015-02-04 00:00:00+00:00  115.589  81.845  140.844  77.155  25.603  101.496   

                               DD     DIS      GE       GS   ...       NKE  \
2015-02-02 00:00:00+00:00  69.017  90.686  23.237  172.155   ...    45.482   
2015-02-03 00:00:00+00:00  70.674  92.826  23.486  176.814   ...    46.190   
2015-02-04 00:00:00+00:00  71.069  99.909  23.189  175.382   ...    45.789   

                              PFE      PG      TRV      UNH      UTX       V  \
2015-02-02 00:00:00+00:00  30.302  80.991  102.197  105.907  112.184  63.044   
2015-02-03 00:00:00+00:00  30.656  81.780  103.624  106.421  114.407  64.143   
2015-02-04 00:00:00+00:00  30.975  81.628  104.298  106.529  113.348  65.402   

                              VZ     WMT     XOM  
2015-02-02 00:00:00+00:00  46.98  82.626  85.733  
2015-02-03 00:00:00+00:00  47.83  83.089  88.289  
2015-02-04 00:00:00+00:00  47.80  83.533  87.533  

[3 rows x 30 columns]
                             AAPL     AXP       BA    CAT   CSCO     CVX  \
2016-08-29 00:00:00+00:00  106.82  65.519  132.900  83.10  31.58  102.05   
2016-08-30 00:00:00+00:00  106.00  65.440  130.810  82.48  31.54  101.72   
2016-08-31 00:00:00+00:00  106.10  65.580  129.449  81.95  31.44  100.58   

                              DD    DIS     GE      GS  ...      NKE    PFE  \
2016-08-29 00:00:00+00:00  70.45  94.87  31.36  166.22  ...    58.63  35.11   
2016-08-30 00:00:00+00:00  70.24  94.87  31.37  169.37  ...    58.00  34.88   
2016-08-31 00:00:00+00:00  69.60  94.46  31.24  169.46  ...    57.64  34.80   

                              PG     TRV     UNH     UTX      V     VZ    WMT  \
2016-08-29 00:00:00+00:00  88.30  118.48  137.27  107.97  80.87  52.50  71.40   
2016-08-30 00:00:00+00:00  87.54  118.47  136.87  107.35  81.17  52.27  71.31   
2016-08-31 00:00:00+00:00  87.31  118.71  136.05  106.43  80.90  52.33  71.44   

                             XOM  
2016-08-29 00:00:00+00:00  87.84  
2016-08-30 00:00:00+00:00  87.54  
2016-08-31 00:00:00+00:00  87.14  

[3 rows x 30 columns]



In [13]:

    
print type(seSig),"length=",len(seSig)
print seSig.head(3)
print seSig.tail(3)
# Make sure out MultiIndex is date+symbol
print seSig.index[0]









    



<class 'pandas.core.series.Series'> length= 11610
date                       sym 
2015-02-20 00:00:00+00:00  AAPL   -20.694482
2015-02-23 00:00:00+00:00  AAPL   -21.610103
2015-02-24 00:00:00+00:00  AAPL   -21.178680
dtype: float64
date                       sym
2016-08-29 00:00:00+00:00  XOM    -65.675206
2016-08-30 00:00:00+00:00  XOM    -99.419735
2016-08-31 00:00:00+00:00  XOM   -115.905694
dtype: float64
(Timestamp('2015-02-20 00:00:00+0000', tz='UTC'), u'AAPL')



In [14]:

    
alphalens.tears.create_factor_tear_sheet( \
        factor=seSig, \
        prices=dfPrice,periods=(1,2,3))









    



Returns Analysis






    






  
    
      
      1
      2
      3
    
  
  
    
      Ann. alpha
      0.067
      0.067
      0.054
    
    
      beta
      0.008
      -0.010
      -0.000
    
    
      Mean Period Wise Return Top Quantile (bps)
      0.993
      0.734
      0.451
    
    
      Mean Period Wise Return Bottom Quantile (bps)
      -2.601
      -2.355
      -2.243
    
    
      Mean Period Wise Spread (bps)
      3.594
      3.088
      2.694
    
  








    



Information Analysis






    






  
    
      
      1
      2
      3
    
  
  
    
      IC Mean
      0.011
      0.010
      0.002
    
    
      IC Std.
      0.191
      0.192
      0.195
    
    
      t-stat(IC)
      1.117
      0.986
      0.190
    
    
      p-value(IC)
      0.265
      0.325
      0.849
    
    
      IC Skew
      -0.023
      0.028
      -0.019
    
    
      IC Kurtosis
      -0.200
      -0.468
      -0.374
    
    
      Ann. IR
      0.905
      0.799
      0.154
    
  








    



Turnover Analysis






    






  
    
      
      1
    
  
  
    
      Quantile 1 Mean Turnover
      0.088
    
    
      Quantile 2 Mean Turnover
      0.192
    
    
      Quantile 3 Mean Turnover
      0.211
    
    
      Quantile 4 Mean Turnover
      0.173
    
    
      Quantile 5 Mean Turnover
      0.081
    
  








    






  
    
      
      1
    
  
  
    
      Mean Factor Rank Autocorrelation
      0.968
    
  








    



/home/rproko01/pyfolio/local/lib/python2.7/site-packages/alphalens/plotting.py:700: FutureWarning: pd.rolling_apply is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,min_periods=1,window=2).apply(args=<tuple>,func=<function>,kwargs=<dict>)
  min_periods=1, args=(period,))
/home/rproko01/pyfolio/local/lib/python2.7/site-packages/alphalens/plotting.py:741: FutureWarning: pd.rolling_apply is deprecated for DataFrame and will be removed in a future version, replace with 
	DataFrame.rolling(center=False,min_periods=1,window=2).apply(args=<tuple>,func=<function>,kwargs=<dict>)
  min_periods=1, args=(period,))
/home/rproko01/pyfolio/local/lib/python2.7/site-packages/alphalens/plotting.py:700: FutureWarning: pd.rolling_apply is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,min_periods=1,window=3).apply(args=<tuple>,func=<function>,kwargs=<dict>)
  min_periods=1, args=(period,))
/home/rproko01/pyfolio/local/lib/python2.7/site-packages/alphalens/plotting.py:741: FutureWarning: pd.rolling_apply is deprecated for DataFrame and will be removed in a future version, replace with 
	DataFrame.rolling(center=False,min_periods=1,window=3).apply(args=<tuple>,func=<function>,kwargs=<dict>)
  min_periods=1, args=(period,))
/home/rproko01/pyfolio/local/lib/python2.7/site-packages/alphalens/plotting.py:491: FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(window=22,center=False).mean()
  pd.rolling_mean(mean_returns_spread_bps, 22).plot(color='orangered',
/home/rproko01/pyfolio/local/lib/python2.7/site-packages/matplotlib/axes/_axes.py:2748: MatplotlibDeprecationWarning: Use of None object as fmt keyword argument to suppress plotting of data values is deprecated since 1.4; use the string "none" instead.
  warnings.warn(msg, mplDeprecation, stacklevel=1)
/home/rproko01/pyfolio/local/lib/python2.7/site-packages/alphalens/plotting.py:202: FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(window=22,center=False).mean()
  pd.rolling_mean(ic, 22).plot(ax=a,






    





<matplotlib.figure.Figure at 0x7f3cfe4df350>



In [ ]:



In [ ]:



In [ ]:

	1	2	3
Ann. alpha	0.067	0.067	0.054
beta	0.008	-0.010	-0.000
Mean Period Wise Return Top Quantile (bps)	0.993	0.734	0.451
Mean Period Wise Return Bottom Quantile (bps)	-2.601	-2.355	-2.243
Mean Period Wise Spread (bps)	3.594	3.088	2.694

	1	2	3
IC Mean	0.011	0.010	0.002
IC Std.	0.191	0.192	0.195
t-stat(IC)	1.117	0.986	0.190
p-value(IC)	0.265	0.325	0.849
IC Skew	-0.023	0.028	-0.019
IC Kurtosis	-0.200	-0.468	-0.374
Ann. IR	0.905	0.799	0.154

	1
Quantile 1 Mean Turnover	0.088
Quantile 2 Mean Turnover	0.192
Quantile 3 Mean Turnover	0.211
Quantile 4 Mean Turnover	0.173
Quantile 5 Mean Turnover	0.081