Let's approach the problem of stock-picking asking: which stock would you pick if you know the future and you can open 1 position only (short/long) for a fixed time-window and with a fixed amount of money?
The answer is the
It would be very easy and for this reason God decided to prevent us from predicting correctly the future. Key questions are:
For the second questions, an index like sharpe ratio was born, but how good would be a strategy for stock picking based on sharpe ratio?
Let's assume as universe the top 1500 “tradeable” stocks by 200-day average dollar volume, capped at 30% of equities allocated to any single sector. A stock is considered “tradeable” if it meets the following criteria:
In [1]:
from quantopian.interactive.data.sentdex import sentiment
from quantopian.pipeline.filters.morningstar import Q1500US
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
import matplotlib.pyplot as plt
import datetime
import numpy as np
import pandas as pd
In [2]:
def fill_missing_values(df_data):
"""Fill missing values in data frame, in place."""
df_data.fillna(method='ffill',inplace=True)
df_data.fillna(method='backfill',inplace=True)
return df_data
In [3]:
DELTA = 7
In [4]:
start_date = (datetime.date.today() - datetime.timedelta(DELTA)).strftime("%m-%d-%Y")
print("start_date",start_date)
In [5]:
end_date = (datetime.date.today()).strftime("%m-%d-%Y")
print("end_date",end_date)
In [6]:
def make_pipeline():
universe = (Q1500US())
pipe = Pipeline( screen = universe)
return pipe
result = run_pipeline(pipeline=make_pipeline(),start_date=start_date,end_date=end_date)
result.head()
Out[6]:
In [7]:
assets = result.index.levels[1].unique()
print("# assets:",len(assets))
In [8]:
pricing = fill_missing_values(get_pricing(assets,start_date=start_date,end_date=end_date,fields='open_price'))
In [9]:
pricing.head()
Out[9]:
In [10]:
type(pricing)
Out[10]:
In [11]:
pricing = pricing.sort_index()
ax = pricing.ix[:,0:10].plot(title="Stock Data")
ax.set_xlabel("Date")
ax.set_ylabel("Price")
plt.show()
In [12]:
def compute_daily_returns(df):
"""Compute and return the daily return values."""
# Note: Returned DataFrame must have the same number of rows
daily_returns = (df / df.shift(1)) - 1
daily_returns.ix[0,:] = 0
return daily_returns
In [13]:
dayly_returns = compute_daily_returns(pricing)
dayly_returns = dayly_returns.sort_index()
dayly_returns.head()
Out[13]:
In [14]:
ax = dayly_returns.ix[:,0:10].plot(title="Daily returns")
ax.set_xlabel("Date")
ax.set_ylabel("Daily return")
plt.show()
Top performance by day and related symbols
In [15]:
dayly_returns.max(axis=1)
Out[15]:
In [16]:
dayly_returns.idxmax(axis=1)
Out[16]:
In [17]:
dayly_returns.ix[:,0:4]
Out[17]:
In [18]:
dayly_returns[dayly_returns.columns[0]]
Out[18]:
In [19]:
dayly_returns[dayly_returns.idxmax(axis=1)[1]]
Out[19]:
average / std
In [20]:
dayly_returns.mean(axis=1)
Out[20]:
In [21]:
dayly_returns.std(axis=1)
Out[21]:
In [22]:
def cumulative_returns(df):
return df/df.ix[0,:] - 1
In [23]:
cr = cumulative_returns(pricing)
cr = cr.sort_index()
cr.head()
Out[23]:
In [24]:
cr.max(axis=1)
Out[24]:
In [25]:
cr.idxmax(axis=1)
Out[25]:
Best performance
In [26]:
ax = cr[pd.concat([cr.idxmax(axis=1),cr.idxmin(axis=1)])].plot(title="Cumulative returns")
ax.set_xlabel("Date")
ax.set_ylabel("Cumulative return")
plt.show()
In [28]:
def sharpe_ratio(df,sample_freq='d',risk_free_rate=0.0):
sr = (df - risk_free_rate).mean() / df.std()
if sample_freq == 'd':
sr = sr * np.sqrt(252)
elif sample_freq == 'w':
sr = sr * np.sqrt(52)
elif sample_freq == 'm':
sr = sr * np.sqrt(12)
else:
raise Exception('unkown sample frequency :'+str(sample_freq))
sr = sr.replace(np.inf, np.nan)
return sr
In [32]:
sr = sharpe_ratio(compute_daily_returns(pricing))
sr.head()
Out[32]:
In [33]:
sr = sr.sort_values(ascending=False)
sr.head()
Out[33]:
In [34]:
pd.concat([sr.head(3),sr.fillna(0).tail(1)]).plot()
Out[34]:
In [35]:
DELTA = 30
In [36]:
start_date = (datetime.date.today() - datetime.timedelta(DELTA)).strftime("%m-%d-%Y")
print("start_date",start_date)
In [37]:
end_date = (datetime.date.today()).strftime("%m-%d-%Y")
print("end_date",end_date)
In [38]:
def make_pipeline():
universe = (Q1500US())
pipe = Pipeline( screen = universe)
return pipe
result = run_pipeline(pipeline=make_pipeline(),start_date=start_date,end_date=end_date)
result.head()
Out[38]:
In [39]:
assets = result.index.levels[1].unique()
print("# assets:",len(assets))
In [40]:
pricing = fill_missing_values(get_pricing(assets,start_date=start_date,end_date=end_date,fields='open_price'))
In [41]:
pricing.head()
Out[41]:
In [42]:
type(pricing)
Out[42]:
In [43]:
pricing = pricing.sort_index()
ax = pricing.ix[:,0:10].plot(title="Stock Data")
ax.set_xlabel("Date")
ax.set_ylabel("Price")
plt.show()
In [44]:
def compute_daily_returns(df):
"""Compute and return the daily return values."""
# Note: Returned DataFrame must have the same number of rows
daily_returns = (df / df.shift(1)) - 1
daily_returns.ix[0,:] = 0
return daily_returns
In [45]:
dayly_returns = compute_daily_returns(pricing)
dayly_returns = dayly_returns.sort_index()
dayly_returns.head()
Out[45]:
In [46]:
ax = dayly_returns.ix[:,0:10].plot(title="Daily returns")
ax.set_xlabel("Date")
ax.set_ylabel("Daily return")
plt.show()
Top performance by day and related symbols
In [47]:
dayly_returns.max(axis=1)
Out[47]:
In [48]:
dayly_returns.idxmax(axis=1)
Out[48]:
average / std
In [49]:
dayly_returns.mean(axis=1)
Out[49]:
In [50]:
dayly_returns.std(axis=1)
Out[50]:
In [51]:
def cumulative_returns(df):
return df/df.ix[0,:] - 1
In [52]:
cr = cumulative_returns(pricing)
cr = cr.sort_index()
cr.head()
Out[52]:
In [53]:
cr.max(axis=1)
Out[53]:
In [54]:
cr.idxmax(axis=1)
Out[54]:
Focusing on the last day only, we want to know the ranking.
In [55]:
cr.ix[ (cr.shape[0]-1) ,:].sort_values(ascending=False)
Out[55]:
Best performance
In [56]:
pricing[cr.idxmax(axis=1)[cr.idxmax(axis=1).shape[0]-1]].plot()
Out[56]:
In [57]:
ax = cr[pd.concat([cr.idxmax(axis=1),cr.idxmin(axis=1)])].plot(title="Cumulative returns")
ax.set_xlabel("Date")
ax.set_ylabel("Cumulative return")
ax.legend_.remove()
plt.show()
In [58]:
def sharpe_ratio(df,sample_freq='d',risk_free_rate=0.0):
sr = (df - risk_free_rate).mean() / df.std()
if sample_freq == 'd':
sr = sr * np.sqrt(252)
elif sample_freq == 'w':
sr = sr * np.sqrt(52)
elif sample_freq == 'm':
sr = sr * np.sqrt(12)
else:
raise Exception('unkown sample frequency :'+str(sample_freq))
sr = sr.replace(np.inf, np.nan)
return sr
In [59]:
sr = sharpe_ratio(compute_daily_returns(pricing))
sr.fillna(0)
sr.head()
Out[59]:
In [60]:
sr = sr.sort_values(ascending=False)
sr.head(20)
Out[60]:
In [61]:
pd.concat([sr.head(3),sr.tail(1)]).plot()
Out[61]:
In [62]:
pricing[sr.index[0]].plot()
Out[62]:
In [63]:
pricing[sr.index[1]].plot()
Out[63]:
In [64]:
pricing[sr.index[2]].plot()
Out[64]:
In [65]:
DELTA = 30*6
In [66]:
start_date = (datetime.date.today() - datetime.timedelta(DELTA)).strftime("%m-%d-%Y")
print("start_date",start_date)
In [67]:
end_date = (datetime.date.today()).strftime("%m-%d-%Y")
print("end_date",end_date)
In [68]:
def make_pipeline():
universe = (Q1500US())
pipe = Pipeline( screen = universe)
return pipe
result = run_pipeline(pipeline=make_pipeline(),start_date=start_date,end_date=end_date)
result.head()
Out[68]:
In [69]:
assets = result.index.levels[1].unique()
print("# assets:",len(assets))
In [70]:
pricing = fill_missing_values(get_pricing(assets,start_date=start_date,end_date=end_date,fields='open_price'))
In [71]:
pricing.head()
Out[71]:
In [72]:
type(pricing)
Out[72]:
In [73]:
pricing = pricing.sort_index()
ax = pricing.ix[:,0:10].plot(title="Stock Data")
ax.set_xlabel("Date")
ax.set_ylabel("Price")
ax.legend_.remove()
plt.show()
In [74]:
def compute_daily_returns(df):
"""Compute and return the daily return values."""
# Note: Returned DataFrame must have the same number of rows
daily_returns = (df / df.shift(1)) - 1
daily_returns.ix[0,:] = 0
return daily_returns
In [75]:
dayly_returns = compute_daily_returns(pricing)
dayly_returns = dayly_returns.sort_index()
dayly_returns.head()
Out[75]:
In [76]:
ax = dayly_returns.ix[:,0:10].plot(title="Daily returns")
ax.set_xlabel("Date")
ax.set_ylabel("Daily return")
plt.show()
Top performance by day and related symbols
In [77]:
dayly_returns.max(axis=1)
Out[77]:
In [78]:
dayly_returns.idxmax(axis=1)
Out[78]:
average / std
In [79]:
dayly_returns.mean(axis=1)
Out[79]:
In [80]:
dayly_returns.std(axis=1)
Out[80]:
In [81]:
def cumulative_returns(df):
return df/df.ix[0,:] - 1
In [82]:
cr = cumulative_returns(pricing)
cr = cr.sort_index()
cr.head()
Out[82]:
In [83]:
cr.max(axis=1)
Out[83]:
In [84]:
cr.idxmax(axis=1)
Out[84]:
Focusing on the last day only, we want to know the ranking.
In [85]:
cr.ix[ (cr.shape[0]-1) ,:].sort_values(ascending=False)
Out[85]:
Best performance
In [86]:
cr.idxmax(axis=1)[cr.idxmax(axis=1).shape[0]-1]
Out[86]:
In [87]:
pricing[cr.idxmax(axis=1)[cr.idxmax(axis=1).shape[0]-1]].plot()
Out[87]:
In [88]:
ax = cr[pd.concat([cr.idxmax(axis=1),cr.idxmin(axis=1)])].plot(title="Cumulative returns")
ax.set_xlabel("Date")
ax.set_ylabel("Cumulative return")
ax.legend_.remove()
plt.show()
In [89]:
def sharpe_ratio(df,sample_freq='d',risk_free_rate=0.0):
sr = (df - risk_free_rate).mean() / df.std()
if sample_freq == 'd':
sr = sr * np.sqrt(252)
elif sample_freq == 'w':
sr = sr * np.sqrt(52)
elif sample_freq == 'm':
sr = sr * np.sqrt(12)
else:
raise Exception('unkown sample frequency :'+str(sample_freq))
sr = sr.replace(np.inf, np.nan)
return sr
In [90]:
sr = sharpe_ratio(compute_daily_returns(pricing))
sr.fillna(0)
sr.head()
Out[90]:
In [91]:
sr = sr.sort_values(ascending=False)
sr.head(20)
Out[91]:
In [92]:
pd.concat([sr.head(3),sr.tail(1)]).plot()
Out[92]:
In [93]:
pricing[sr.index[0]].plot()
Out[93]:
In [94]:
pricing[sr.index[1]].plot()
Out[94]:
In [95]:
pricing[sr.index[2]].plot()
Out[95]: