In [249]:
import numpy as np
import pandas as pd
import pymc as pm
from matplotlib import pyplot as plt
import scipy.stats as stats
import seaborn as sns
%matplotlib inline
In [34]:
g = pm.Gamma("g", 1, 1)
samples = np.array([g.random() for _ in xrange(1000)])
In [38]:
plt.figure(figsize = (12, 6))
sns.distplot(samples, kde = False);
In [39]:
plt.figure(figsize = (12, 6))
n = 4
for i in range(10):
ax = plt.subplot(2, 5, i + 1)
if i >= 5:
n = 15
plt.imshow(pm.rwishart(n + 1, np.eye(n)), interpolation = "none", cmap = plt.cm.hot)
ax.axis("off")
plt.suptitle("Random matrices from a Wishart Distribution");
In [46]:
plt.figure(figsize = (12.5, 5))
params = [(2, 5), (1, 1), (0.5, 0.5), (5, 5), (20, 4), (5, 1)]
x = np.linspace(0, 1, 100)
beta = stats.beta
with sns.axes_style("whitegrid"):
for a, b in params:
y = beta.pdf(x, a, b)
lines = plt.plot(x, y, label = "(%.1f, %.1f)" % (a, b), lw = 3)
plt.fill_between(x, 0, y, alpha = 0.2, color = lines[0].get_color())
plt.autoscale(tight = True)
plt.ylim(0)
plt.legend(loc = 'upper left', title = '(a, b) parameters')
In [90]:
from pymc import rbeta
rand = np.random.rand
class Bandits(object):
def __init__(self, p_array):
self.p = p_array
self.optimal = max(p_array)
def __len__(self):
return len(self.p)
def pull(self, i):
return rand() < self.p[i]
class BayesianStrategy(object):
def __init__(self, bandits):
nbandits = len(bandits)
self.bandits = bandits
self.N = 0
self.trials = np.zeros(nbandits)
self.wins = np.zeros(nbandits)
def run(self, n = 100):
for _ in xrange(n):
sample_ps = beta.rvs(1 + self.wins, 1 + self.trials - self.trials)
i = np.argmax(sample_ps)
if self.bandits.pull(i):
self.wins[i] += 1
self.trials[i] += 1
self.N += 1
return
In [105]:
bandits = Bandits([0.1, 0.15, 0.18])
bs = BayesianStrategy(bandits)
beta = stats.beta
x = np.linspace(0.001, 0.999, 200)
def plot_priors(bs, prob, lw = 3, alpha = 0.2, plt_vlines = True):
wins = bs.wins
trials = bs.trials
for i in range(len(prob)):
y = beta(1 + wins[i], 1 + trials[i] - wins[i])
p = plt.plot(x, y.pdf(x), lw = lw)
c = p[0].get_markeredgecolor()
plt.fill_between(x, 0, y.pdf(x), color = c, alpha = alpha,
label = "Underlying probability: %.2f" %prob[i])
if plt_vlines:
plt.vlines(prob[i], 0, y.pdf(prob[i]),
linestyles = "--", lw = 2)
plt.autoscale(tight = True)
plt.title("Posterior after %d pull" %bs.N + "s" * (bs.N > 1))
plt.autoscale(tight = True)
return
In [107]:
probs = [0.85, 0.60, 0.75]
bandits = Bandits(probs)
bs = BayesianStrategy(bandits)
plt.figure(figsize = (15, 15));
sns.set_style("whitegrid")
sample_sizes = [1, 1, 3, 10, 10, 25, 50, 100, 200, 600]
for j, i in enumerate(sample_sizes):
plt.subplot(5, 2, j + 1)
bs.run(i)
plot_priors(bs, probs)
plt.autoscale(tight = True)
plt.tight_layout;
In [251]:
## Stock returns
colors = ["#348ABD", "#A60628", "#7A68A6", "#467821"]
normal = stats.norm
expert_prior_params = {"AAPL": (0.05, 0.03),
"GOOG": (-0.03, 0.04),
"TSLA": (-0.02, 0.01),
"AMZN": (0.03, 0.02)}
plt.figure(figsize = (14, 7))
sns.set_style("whitegrid")
x = np.linspace(-0.15, 0.15, 100)
for i, (name, (mu, sigma)) in enumerate(expert_prior_params.iteritems()):
plt.subplot(2, 2, i + 1)
y = normal.pdf(x, mu, sigma)
plt.plot(x, y, lw = 2, color = colors[i])
plt.fill_between(x, 0, y, color = colors[i], alpha = 0.2)
plt.title("%s prior" %name)
plt.vlines(0, 0, y.max(), "k", linestyles = "--", lw = 0.5)
plt.xlim(-0.15, 0.15)
In [261]:
n_observations = 100
prior_mu, prior_sigma = map(np.array, zip(*expert_prior_params.values()))
inv_cov_matrix = pm.Wishart("inv_cov_matrix", n_observations, np.eye(4))
returns = pm.Normal("returns", prior_mu, prior_sigma)
In [262]:
# I wish I could have used Pandas as a prereq for this book, but oh well.
import datetime
import ystockquote as ysq
stocks = ["AAPL", "GOOG", "TSLA", "AMZN"]
enddate = datetime.datetime.now().strftime("%Y-%m-%d") # today's date.
startdate = "2012-09-01"
stock_closes = {}
stock_returns = {}
for stock in stocks:
x = sorted(ysq.get_historical_prices(stock, startdate, enddate).items(), key = lambda (d, _): d)[::-1]
stock_closes[stock] = np.array(map(lambda s: s[1]['Close'], x)).astype(float)
# create returns:
for stock in stocks:
_previous_day = np.roll(stock_closes[stock], -1)
stock_returns[stock] = ((stock_closes[stock] - _previous_day) / _previous_day)[:n_observations]
dates = map(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"), (s[0] for s in x[:n_observations]))
In [263]:
plt.figure(figsize = (12.5, 4))
for _stock, _returns in stock_returns.iteritems():
p = plt.plot((1 + _returns)[::-1].cumprod() - 1, '-o', label = _stock,
markersize = 4, markeredgecolor = "none")
plt.xticks(np.arange(100)[::-8],
map(lambda x: datetime.datetime.strftime(x, "%Y-%m-%d"), dates[::-8]),
rotation = 60)
plt.legend(loc = "upper left")
plt.title("Return space")
plt.ylabel("return of $1 on first date, x100%");
In [264]:
sns.set_style("whitegrid")
plt.figure(figsize = (14, 7))
for i, (_stock, _returns) in enumerate(stock_returns.iteritems()):
plt.subplot(2, 2, i + 1)
plt.hist(_returns, histtype = "stepfilled", bins = 20,
color = colors[i], label = "Daily returns for %s" %_stock)
plt.xlim(-0.15, 0.15)
plt.legend()
plt.tight_layout()
In [265]:
obs_returns = np.vstack(stock_returns.values()).T
obs = pm.MvNormal("observed_returns", returns, inv_cov_matrix, value = obs_returns, observed = True)
model = pm.Model([returns, inv_cov_matrix, obs])
mcmc = pm.MCMC(model)
mcmc.sample(150000, 100000, 3)
In [267]:
plt.figure(figsize = (13.5, 5))
mu_samples = mcmc.trace("returns")[:]
for i in range(4):
plt.hist(mu_samples[:, i], alpha=0.8 - 0.05 * i, bins=30,
histtype="stepfilled", normed=True,
label="%s" % stock_returns.keys()[i])
plt.vlines(mu_samples.mean(axis=0), 0, 100, linestyle="--", linewidth=.5)
plt.title("Posterior distribution of $\mu$, daily stock returns")
plt.legend();
In [268]:
print obs_returns.mean(axis = 0)
print mu_samples.mean(axis = 0)
In [269]:
expert_prior_params
Out[269]:
In [ ]: