notebook.community

Edit and run



In [249]:

    
import numpy as np
import pandas as pd
import pymc as pm
from matplotlib import pyplot as plt
import scipy.stats as stats
import seaborn as sns
%matplotlib inline



In [34]:

    
g = pm.Gamma("g", 1, 1)
samples = np.array([g.random() for _ in xrange(1000)])



In [38]:

    
plt.figure(figsize = (12, 6))
sns.distplot(samples, kde = False);



In [39]:

    
plt.figure(figsize = (12, 6))
n = 4
for i in range(10):
    ax = plt.subplot(2, 5, i + 1)
    if i >= 5:
        n = 15
    plt.imshow(pm.rwishart(n + 1, np.eye(n)), interpolation = "none", cmap = plt.cm.hot)
    ax.axis("off")
plt.suptitle("Random matrices from a Wishart Distribution");



In [46]:

    
plt.figure(figsize = (12.5, 5))
params = [(2, 5), (1, 1), (0.5, 0.5), (5, 5), (20, 4), (5, 1)]

x = np.linspace(0, 1, 100)
beta = stats.beta

with sns.axes_style("whitegrid"):
    for a, b in params:
        y = beta.pdf(x, a, b)
        lines = plt.plot(x, y, label = "(%.1f, %.1f)" % (a, b), lw = 3)
        plt.fill_between(x, 0, y, alpha = 0.2, color = lines[0].get_color())
        plt.autoscale(tight = True)
    plt.ylim(0)
    plt.legend(loc = 'upper left', title = '(a, b) parameters')



In [90]:

    
from pymc import rbeta

rand = np.random.rand
class Bandits(object):
    def __init__(self, p_array):
        self.p = p_array
        self.optimal = max(p_array)
    
    def __len__(self):
        return len(self.p)
    
    def pull(self, i):
        return rand() < self.p[i]
        

class BayesianStrategy(object):
    def __init__(self, bandits):
        nbandits = len(bandits)
        self.bandits = bandits
        self.N = 0
        self.trials = np.zeros(nbandits)
        self.wins = np.zeros(nbandits)
    
    def run(self, n = 100):
        for _ in xrange(n):
            sample_ps = beta.rvs(1 + self.wins, 1 + self.trials - self.trials)
            i = np.argmax(sample_ps)
            if self.bandits.pull(i):
                self.wins[i] += 1
            self.trials[i] += 1
            self.N += 1
        return



In [105]:

    
bandits = Bandits([0.1, 0.15, 0.18])
bs = BayesianStrategy(bandits)

beta = stats.beta
x = np.linspace(0.001, 0.999, 200)

def plot_priors(bs, prob, lw = 3, alpha = 0.2, plt_vlines = True):
    wins = bs.wins
    trials = bs.trials
    for i in range(len(prob)):
        y = beta(1 + wins[i], 1 + trials[i] - wins[i])
        p = plt.plot(x, y.pdf(x), lw = lw)
        c = p[0].get_markeredgecolor()
        plt.fill_between(x, 0, y.pdf(x), color = c, alpha = alpha,
                         label = "Underlying probability: %.2f" %prob[i])
        if plt_vlines:
            plt.vlines(prob[i], 0, y.pdf(prob[i]), 
                       linestyles = "--", lw = 2)
        plt.autoscale(tight = True)
        plt.title("Posterior after %d pull" %bs.N + "s" * (bs.N > 1))
        plt.autoscale(tight = True)
    return



In [107]:

    
probs = [0.85, 0.60, 0.75]
bandits = Bandits(probs)
bs = BayesianStrategy(bandits)

plt.figure(figsize = (15, 15));
sns.set_style("whitegrid")
sample_sizes = [1, 1, 3, 10, 10, 25, 50, 100, 200, 600]
for j, i in enumerate(sample_sizes):
    plt.subplot(5, 2, j + 1)
    bs.run(i)
    plot_priors(bs, probs)
    plt.autoscale(tight = True)
plt.tight_layout;



In [251]:

    
## Stock returns
colors = ["#348ABD", "#A60628", "#7A68A6", "#467821"]
normal = stats.norm

expert_prior_params = {"AAPL": (0.05, 0.03),
                       "GOOG": (-0.03, 0.04),
                       "TSLA": (-0.02, 0.01),
                       "AMZN": (0.03, 0.02)}

plt.figure(figsize = (14, 7))
sns.set_style("whitegrid")
x = np.linspace(-0.15, 0.15, 100)
for i, (name, (mu, sigma)) in enumerate(expert_prior_params.iteritems()):
    plt.subplot(2, 2, i + 1)
    y = normal.pdf(x, mu, sigma)
    plt.plot(x, y, lw = 2, color = colors[i])
    plt.fill_between(x, 0, y, color = colors[i], alpha = 0.2)
    plt.title("%s prior" %name)
    plt.vlines(0, 0, y.max(), "k", linestyles = "--", lw = 0.5)
    plt.xlim(-0.15, 0.15)



In [261]:

    
n_observations = 100

prior_mu, prior_sigma = map(np.array, zip(*expert_prior_params.values()))

inv_cov_matrix = pm.Wishart("inv_cov_matrix", n_observations, np.eye(4))
returns = pm.Normal("returns", prior_mu, prior_sigma)



In [262]:

    
# I wish I could have used Pandas as a prereq for this book, but oh well.
import datetime
import ystockquote as ysq

stocks = ["AAPL", "GOOG", "TSLA", "AMZN"]

enddate = datetime.datetime.now().strftime("%Y-%m-%d")  # today's date.
startdate = "2012-09-01"

stock_closes = {}
stock_returns = {}

for stock in stocks:
    x = sorted(ysq.get_historical_prices(stock, startdate, enddate).items(), key = lambda (d, _): d)[::-1]
    stock_closes[stock] = np.array(map(lambda s: s[1]['Close'], x)).astype(float)

# create returns:

for stock in stocks:
    _previous_day = np.roll(stock_closes[stock], -1)
    stock_returns[stock] = ((stock_closes[stock] - _previous_day) / _previous_day)[:n_observations]

dates = map(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"), (s[0] for s in x[:n_observations]))



In [263]:

    
plt.figure(figsize = (12.5, 4))

for _stock, _returns in stock_returns.iteritems():
    p = plt.plot((1 + _returns)[::-1].cumprod() - 1, '-o', label = _stock,
                 markersize = 4, markeredgecolor = "none")

plt.xticks(np.arange(100)[::-8],
           map(lambda x: datetime.datetime.strftime(x, "%Y-%m-%d"), dates[::-8]),
           rotation = 60)

plt.legend(loc = "upper left")
plt.title("Return space")
plt.ylabel("return of $1 on first date, x100%");



In [264]:

    
sns.set_style("whitegrid")
plt.figure(figsize = (14, 7))

for i, (_stock, _returns) in enumerate(stock_returns.iteritems()):
    plt.subplot(2, 2, i + 1)
    plt.hist(_returns, histtype = "stepfilled", bins = 20, 
             color = colors[i], label = "Daily returns for %s" %_stock)
    plt.xlim(-0.15, 0.15)
    plt.legend()

plt.tight_layout()



In [265]:

    
obs_returns = np.vstack(stock_returns.values()).T
obs = pm.MvNormal("observed_returns", returns, inv_cov_matrix, value = obs_returns, observed = True)
model = pm.Model([returns, inv_cov_matrix, obs])
mcmc = pm.MCMC(model)
mcmc.sample(150000, 100000, 3)









    



 [-----------------100%-----------------] 150000 of 150000 complete in 94.9 sec



In [267]:

    
plt.figure(figsize = (13.5, 5))

mu_samples = mcmc.trace("returns")[:]

for i in range(4):
    plt.hist(mu_samples[:, i], alpha=0.8 - 0.05 * i, bins=30,
             histtype="stepfilled", normed=True,
             label="%s" % stock_returns.keys()[i])

plt.vlines(mu_samples.mean(axis=0), 0, 100, linestyle="--", linewidth=.5)

plt.title("Posterior distribution of $\mu$, daily stock returns")
plt.legend();



In [268]:

    
print obs_returns.mean(axis = 0)
print mu_samples.mean(axis = 0)









    



[-0.00088836  0.0018774   0.00059402 -0.00290232]
[-0.00108514  0.00184024  0.00067226 -0.00290865]



In [269]:

    
expert_prior_params









    Out[269]:





{'AAPL': (0.05, 0.03),
 'AMZN': (0.03, 0.02),
 'GOOG': (-0.03, 0.04),
 'TSLA': (-0.02, 0.01)}



In [ ]: