In [78]:
import pandas as pd
import pandas.io.data as web
import matplotlib.pyplot as plt
import datetime as datetime
from numpy import *
%matplotlib inline
Set start date, end date and data source ('Yahoo Finance', 'Google Finance', etc.). Download S&P 500 index data from Yahoo Finance.
In [79]:
start_date = datetime.date(1976,1,1)
end_date = datetime.date(2017,1,1)
# Download S&P 500 index data
try:
SnP500_Ddata = web.DataReader('^GSPC','yahoo',start_date,end_date)
except:
SnP500_Ddata = pd.read_csv("http://analytics.romanko.ca/data/SP500_hist.csv")
SnP500_Ddata.index = pd.to_datetime(SnP500_Ddata.Date)
SnP500_Ddata.head()
Out[79]:
In [80]:
# Create a time-series of annual data points from daily data
SnP500_Adata = SnP500_Ddata.resample('A').last()
SnP500_Adata[['Volume','Adj Close']].tail()
Out[80]:
In [81]:
SnP500_Adata[['Adj Close']] = SnP500_Adata[['Adj Close']].apply(pd.to_numeric, errors='ignore')
SnP500_Adata['returns'] = SnP500_Adata['Adj Close'] / SnP500_Adata['Adj Close'].shift(1) -1
SnP500_Adata = SnP500_Adata.dropna()
print SnP500_Adata['returns']
In [82]:
SnP500_mean_ret = float(SnP500_Adata[['returns']].mean())
SnP500_std_ret = float(SnP500_Adata[['returns']].std())
print ("S&P 500 average return = %g%%, st. dev = %g%%") % (100*SnP500_mean_ret, 100*SnP500_std_ret)
We want to invest \$1000 in the US stock market for 1 year: $v_0 = 1000$
In [83]:
v0 = 1000 # Initial capital
In our example we assume that the return of the market over the next year follow Normal distribution.
Between 1977 and 2014, S&P 500 returned 9.38% per year on average with a standard deviation of 16.15%.
Generate 100 scenarios for the market return over the next year (draw 100 random numbers from a Normal distribution with mean 9.38% and standard deviation of 16.15%):
In [84]:
Ns = 100 # Number of scenarios
r01 = random.normal(SnP500_mean_ret, SnP500_std_ret, Ns)
r01
Out[84]:
Value of investment at the end of year 1: $v_1 = v_0 + r_{0,1}\cdot v_0 = (1 + r_{0,1})\cdot v_0$
In [85]:
# Distribution of value at the end of year 1
v1 = (r01 + 1) * v0
v1
Out[85]:
Mean:
In [86]:
mean(v1)
Out[86]:
Standard deviation:
In [87]:
std(v1)
Out[87]:
Minimum, maximum:
In [88]:
min(v1), max(v1)
Out[88]:
Persentiles
5th percentile, median, 95th percentile:
In [89]:
percentile(v1, [5, 50,95])
Out[89]:
Alternative way to compute percentiles
5th percentile, median, 95th percentile:
In [90]:
sortedScen = sorted(v1) # Sort scenarios
sortedScen[5-1], sortedScen[50-1], sortedScen[95-1]
Out[90]:
Plot a histogram of the distribution of outcomes for v1:
In [91]:
hist, bins = histogram(v1)
positions = (bins[:-1] + bins[1:]) / 2
plt.bar(positions, hist, width=60)
plt.xlabel('portfolio value after 1 year')
plt.ylabel('frequency')
plt.show()
Simulated paths over time:
In [92]:
# Plot simulated paths over time
for res in v1:
plt.plot((0,1), (v0, res))
plt.xlabel('time step')
plt.ylabel('portfolio value')
plt.show()
You are planning for retirement and decide to invest in the market for the next 30 years (instead of only the next year as in example 1).
Assume that every year your investment returns from investing into the S&P 500 will follow a Normal distribution with the mean and standard deviation as in example 1.
Your initial capital is still \$1000
In [93]:
v0 = 1000 # Initial capital
Between 1977 and 2014, S&P 500 returned 9.38% per year on average with a standard deviation of 16.15% Simulate 30 columns of 100 observations each of single period returns:
In [94]:
r_speriod30 = random.normal(SnP500_mean_ret, SnP500_std_ret, (Ns, 30))
r_speriod30
Out[94]:
Compute and plot $v_{30}$
In [95]:
v30 = prod(1 + r_speriod30 , 1) * v0
hist, bins = histogram(v30)
positions = (bins[:-1] + bins[1:]) / 2
width = (bins[1] - bins[0]) * 0.8
plt.bar(positions, hist, width=width)
plt.xlabel('portfolio value after 30 years')
plt.ylabel('frequency')
plt.show()
Simulated paths over time:
In [96]:
for scenario in r_speriod30:
y = [prod(1 + scenario[0:i]) * v0 for i in range(0,31)]
plt.plot(range(0,31), y)
plt.xlabel('time step')
plt.ylabel('portfolio value')
plt.show()
In [97]:
# Download 3-month T-bill rates from Federal Reserve
start_date_b = datetime.date(1977,1,1)
end_date_b = datetime.date(2017,1,1)
TBill_Ddata = web.DataReader('DTB3','fred',start_date_b,end_date_b)
TBill_Ddata.head()
Out[97]:
In [98]:
# Create a time-series of annual data points from daily data
TBill_Adata = TBill_Ddata.resample('A').last()
TBill_Adata[['DTB3']].tail()
Out[98]:
In [99]:
TBill_Adata['returns'] = TBill_Adata['DTB3'] / 100
TBill_Adata = TBill_Adata.dropna()
print TBill_Adata['returns']
In [100]:
TBill_mean_ret = float(TBill_Adata[['returns']].mean())
TBill_std_ret = float(TBill_Adata[['returns']].std())
print ("T-bill average return = %g%%, st. dev = %g%%") % (100*TBill_mean_ret, 100*TBill_std_ret)
In [101]:
covMat = cov(array(SnP500_Adata[['returns']]),array(TBill_Adata[['returns']]),rowvar=0)
covMat
Out[101]:
In [102]:
v0 = 1000 # Initial capital
Ns = 5000 # Number of scenarios
In [103]:
mu = [SnP500_mean_ret, TBill_mean_ret] # Expected return
mu
Out[103]:
In [104]:
stockRet = ones(Ns)
bondsRet = ones(Ns)
In [105]:
scenarios = random.multivariate_normal(mu, covMat, Ns)
for year in range(1, 31):
scenarios = random.multivariate_normal(mu, covMat, Ns)
stockRet *= (1 + scenarios[:,0])
bondsRet *= (1 + scenarios[:,1])
In [106]:
v30 = 0.5 * v0 * stockRet + 0.5 * v0 * bondsRet
In [107]:
hist, bins = histogram(v30, bins = 100)
positions = (bins[:-1] + bins[1:]) / 2
width = (bins[1] - bins[0]) * 0.8
plt.bar(positions, hist, width=width)
plt.xlabel('portfolio value after 30 years')
plt.ylabel('frequency')
plt.show()
In [108]:
# Compute portfolios by iterating through different combinations of weights
v30comp = []
for w in arange(0.2, 1.01, 0.2):
v30comp += [w * v0 * stockRet + (1 - w) * v0 * bondsRet]
In [109]:
# Plot a histogram of the distribution of
# differences in outcomes for v30
# (Stratery 4 - Strategy 2)
v30d = v30comp[3] - v30comp[1]
In [110]:
hist, bins = histogram(v30d, bins = 50)
positions = (bins[:-1] + bins[1:]) / 2
width = (bins[1] - bins[0]) * 0.8
plt.bar(positions, hist, width=width)
plt.show()
In [111]:
# Compute number of elements in v30d that are > 0 and < 0 and compare
pos_count = (v30d > 0).sum()
neg_count = (v30d <= 0).sum()
print u"""Strategy 4 was better in %d cases
Strategy 2 was better in %d cases
Difference = %d""" % (pos_count, neg_count, pos_count - neg_count)