What is the correlation between sharpe ratio and cumulative return?
Let's pick
What is the probability that the top/bottom 3 securities by cumulative return are included in this set?
Let's assume as universe the top 1500 “tradeable” stocks by 200-day average dollar volume, capped at 30% of equities allocated to any single sector. A stock is considered “tradeable” if it meets the following criteria:
In [79]:
from quantopian.interactive.data.sentdex import sentiment
from quantopian.pipeline.filters.morningstar import Q1500US
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
import matplotlib.pyplot as plt
import datetime
import numpy as np
import pandas as pd
In [80]:
def fill_missing_values(df_data):
"""Fill missing values in data frame, in place."""
df_data.fillna(method='ffill',inplace=True)
df_data.fillna(method='backfill',inplace=True)
return df_data
def compute_daily_returns(df):
"""Compute and return the daily return values."""
# Note: Returned DataFrame must have the same number of rows
daily_returns = (df / df.shift(1)) - 1
daily_returns.ix[0,:] = 0
return daily_returns
def cumulative_returns(df):
return df/df.ix[0,:] - 1
def sharpe_ratio(df,sample_freq='d',risk_free_rate=0.0):
sr = (df - risk_free_rate).mean() / df.std()
if sample_freq == 'd':
sr = sr * np.sqrt(252)
elif sample_freq == 'w':
sr = sr * np.sqrt(52)
elif sample_freq == 'm':
sr = sr * np.sqrt(12)
else:
raise Exception('unkown sample frequency :'+str(sample_freq))
sr = sr.replace(np.inf, np.nan)
return sr
In [81]:
DELTA = 30*3
In [82]:
start_date = (datetime.date.today() - datetime.timedelta(DELTA)).strftime("%m-%d-%Y")
print("start_date",start_date)
In [83]:
end_date = (datetime.date.today()).strftime("%m-%d-%Y")
print("end_date",end_date)
In [84]:
def make_pipeline():
universe = (Q1500US())
pipe = Pipeline( screen = universe)
return pipe
result = run_pipeline(pipeline=make_pipeline(),start_date=start_date,end_date=end_date)
result.head()
Out[84]:
In [85]:
assets = result.index.levels[1].unique()
print("# assets:",len(assets))
In [86]:
pricing = fill_missing_values(get_pricing(assets,start_date=start_date,end_date=end_date,fields='close_price'))
In [87]:
pricing.head()
Out[87]:
In [88]:
type(pricing)
Out[88]:
In [89]:
pricing = pricing.sort_index()
ax = pricing.ix[:,0:10].plot(title="Stock Data")
ax.set_xlabel("Date")
ax.set_ylabel("Price")
ax.legend_.remove()
plt.show()
In [90]:
cr = cumulative_returns(pricing)
cr = cr.sort_index()
cr.head()
Out[90]:
In [91]:
cr.max(axis=1).head()
Out[91]:
In [92]:
cr.idxmax(axis=1).head()
Out[92]:
Focusing on the last day only, we want to know the ranking.
In [93]:
cr.ix[ (cr.shape[0]-1) ,:].sort_values(ascending=False).head()
Out[93]:
In [94]:
cr.ix[ (cr.shape[0]-1) ,:].sort_values(ascending=False).tail()
Out[94]:
Best performance
In [95]:
cr.idxmax(axis=1)[cr.idxmax(axis=1).shape[0]-1]
Out[95]:
In [96]:
pricing[cr.idxmax(axis=1)[cr.idxmax(axis=1).shape[0]-1]].plot()
Out[96]:
In [97]:
ax = cr[pd.concat([cr.idxmax(axis=1),cr.idxmin(axis=1)])].plot(title="Cumulative returns")
ax.set_xlabel("Date")
ax.set_ylabel("Cumulative return")
ax.legend_.remove()
plt.show()
In [112]:
sr = sharpe_ratio(compute_daily_returns((pricing)))
sr.fillna(0)
sr.head()
Out[112]:
In [113]:
sr = sr.sort_values(ascending=False)
sr.head()
Out[113]:
In [114]:
pd.concat([sr.head(3),sr.tail(1)]).plot()
Out[114]:
In [115]:
pricing[sr.index[0]].plot()
Out[115]:
In [116]:
pricing[sr.index[1]].plot()
Out[116]:
In [117]:
pricing[sr.index[2]].plot()
Out[117]:
In [118]:
merge = pd.concat([sr,cr_last] , axis=1)
merge.columns = ['sharpe_ratio', 'cumulative_return']
merge.head()
Out[118]:
In [119]:
merge.plot(kind='scatter' , x='sharpe_ratio', y='cumulative_return' , s=50)
Out[119]:
It can be observed a clear correlation between sharpe_ratio and cumulative_return.
In [136]:
merge['sharpe_ratio'].corr(merge['cumulative_return'])
Out[136]:
In [120]:
merge.sort_values(by='sharpe_ratio',ascending=False).head(20)
Out[120]:
In [124]:
sr.mean() , sr.std()
Out[124]:
In [121]:
merge.sort_values(by='cumulative_return',ascending=False).head(20)
Out[121]:
In [125]:
cr_last.mean() , cr_last.std()
Out[125]:
The second best security by cumulative return is the best security by sharpe ratio. Is it good for long position?
In [131]:
merge.sort_values(by='sharpe_ratio',ascending=False).index[0]
Out[131]:
In [130]:
pricing[merge.sort_values(by='sharpe_ratio',ascending=False).index[0]].plot()
Out[130]:
Let's compare this with the best security by cumulative return
In [132]:
merge.sort_values(by='cumulative_return',ascending=False).index[0]
Out[132]:
In [134]:
pricing[merge.sort_values(by='cumulative_return',ascending=False).index[0]].plot()
Out[134]:
In [141]:
DELTA = 30*2
DELTA_FROM = 30
In [147]:
start_date = (datetime.date.today() - datetime.timedelta(DELTA)).strftime("%m-%d-%Y")
print("start_date",start_date)
In [148]:
end_date = (datetime.date.today()).strftime("%m-%d-%Y")
print("end_date",end_date)
In [149]:
from_date = (datetime.date.today()- datetime.timedelta(DELTA_FROM)).strftime("%m-%d-%Y")
print("from_date",from_date)
In [150]:
def make_pipeline():
universe = (Q1500US())
pipe = Pipeline( screen = universe)
return pipe
result = run_pipeline(pipeline=make_pipeline(),start_date=start_date,end_date=end_date)
result.head()
Out[150]:
In [151]:
pricing = fill_missing_values(get_pricing(assets,start_date=start_date,end_date=end_date,fields='close_price'))
In [152]:
pricing.head()
Out[152]:
In [154]:
assets = result.index.levels[1].unique()
print("# assets:",len(assets))
In [163]:
mask_1 = pricing.index < from_date
mask_2 = pricing.index >= from_date
In [166]:
pricing_1 = pricing[mask_1]
pricing_1.tail()
Out[166]:
In [167]:
pricing_2 = pricing[mask_2]
pricing_2.head()
Out[167]:
In [189]:
# 1 month
cr1 = cumulative_returns(pricing_1)
cr1 = cr1.fillna(value=0)
cr1 = cr1.sort_index()
sr1 = sharpe_ratio(compute_daily_returns((pricing_1)))
sr1 = sr1.fillna(0)
sr1 = sr1.sort_values(ascending=False)
# 2 month
cr2 = cumulative_returns(pricing_2)
cr2 = cr2.fillna(value=0)
cr2 = cr2.sort_index()
sr2 = sharpe_ratio(compute_daily_returns((pricing_2)))
sr2 = sr1.fillna(0)
sr2 = sr2.sort_values(ascending=False)
In [190]:
cr1.ix[ (cr1.shape[0]-1) ,:].sort_values(ascending=False).head(10)
Out[190]:
In [192]:
sr1.head(10)
Out[192]:
In [178]:
cr1.ix[ (cr1.shape[0]-1) ,:].sort_values(ascending=False).tail(10)
Out[178]:
In [193]:
sr1.tail(10)
Out[193]:
In [238]:
pick_stock = set([item for sub in [cr1.ix[(cr1.shape[0]-1) ,:].sort_values(ascending=False).head(10).index.values.tolist(),
cr1.ix[ (cr1.shape[0]-1) ,:].sort_values(ascending=False).tail(10).index.values.tolist(),
sr1.head(10).index.values.tolist(),
sr1.tail(10).index.values.tolist()] for item in sub])
len(pick_stock)
Out[238]:
In [181]:
cr2.ix[ (cr2.shape[0]-1) ,:].sort_values(ascending=False).head(10)
Out[181]:
In [182]:
cr2.ix[ (cr2.shape[0]-1) ,:].sort_values(ascending=False).tail(10)
Out[182]:
In [241]:
top_stock = set([item for sub in [cr2.ix[ (cr2.shape[0]-1) ,:].sort_values(ascending=False).head(10).index.values.tolist(),
cr2.ix[ (cr2.shape[0]-1) ,:].sort_values(ascending=False).tail(10).index.values.tolist()] for item in sub])
len(top_stock)
Out[241]:
In [243]:
len(top_stock.intersection(pick_stock))
Out[243]:
In [246]:
np.float32(len(top_stock.intersection(pick_stock))) / np.float32(len(top_stock))
Out[246]:
The interesting thing is that these 4 picked stocks in final list don't come from the sharpe ratio list as expected but they come from the cumulative return list.
In [274]:
pick_stock_long = set([item for sub in [cr1.ix[(cr1.shape[0]-1) ,:].sort_values(ascending=False).head(10).index.values.tolist(),
sr1.head(10).index.values.tolist()] for item in sub])
cr = cumulative_returns(pricing_2[list(pick_stock_long)])
cr = cr.fillna(value=0)
cr = cr.sort_index()
crn = cr[-1:]
crn
Out[274]:
In [275]:
crn.mean(axis=1)
Out[275]:
When in the previous month was
In [281]:
cr = cumulative_returns(pricing_1[list(pick_stock_long)])
cr = cr.fillna(value=0)
cr = cr.sort_index()
crn = cr[-1:]
crn.mean(axis=1)
Out[281]:
We switch from a situation like this
In [284]:
cr0 = cumulative_returns(pricing_1[list(pick_stock_long)])
cr0 = cr0.fillna(value=0)
cr0 = cr0.sort_index()
ax = cr0.plot(title="Cumulative returns")
ax.set_xlabel("Date")
ax.set_ylabel("Cumulative return")
ax.legend_.remove()
plt.show()
to a situation like this
In [285]:
cr = cumulative_returns(pricing_2[list(pick_stock_long)])
cr = cr.fillna(value=0)
cr = cr.sort_index()
ax = cr.plot(title="Cumulative returns")
ax.set_xlabel("Date")
ax.set_ylabel("Cumulative return")
ax.legend_.remove()
plt.show()
In [278]:
pick_stock_short = set([item for sub in [cr1.ix[(cr1.shape[0]-1),:].sort_values(ascending=False).tail(10).index.values.tolist(),
sr1.tail(10).index.values.tolist()] for item in sub])
cr = cumulative_returns(pricing_2[list(pick_stock_short)])
cr = cr.fillna(value=0)
cr = cr.sort_index()
crn = cr[-1:]
crn
Out[278]:
In [279]:
crn.mean(axis=1)
Out[279]:
When in the previous month was
In [282]:
cr = cumulative_returns(pricing_1[list(pick_stock_short)])
cr = cr.fillna(value=0)
cr = cr.sort_index()
crn = cr[-1:]
crn.mean(axis=1)
Out[282]:
We switch from a situation like this
In [286]:
cr0 = cumulative_returns(pricing_1[list(pick_stock_short)])
cr0 = cr0.fillna(value=0)
cr0 = cr0.sort_index()
ax = cr0.plot(title="Cumulative returns")
ax.set_xlabel("Date")
ax.set_ylabel("Cumulative return")
ax.legend_.remove()
plt.show()
to a situation like this
In [287]:
cr = cumulative_returns(pricing_2[list(pick_stock_short)])
cr = cr.fillna(value=0)
cr = cr.sort_index()
ax = cr.plot(title="Cumulative returns")
ax.set_xlabel("Date")
ax.set_ylabel("Cumulative return")
ax.legend_.remove()
plt.show()