To replace the data in chapter 1 with this real time data.
In [6]:
from requests import get
response = get('https://api.github.com/repos/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/stats/commit_activity').json()
weekly_totals =np.array( map( lambda x: x['total'], response ) )
weekly_totals = weekly_totals[ np.where( weekly_totals )[0] ] #gives me 52 weeks, but project started < 1 year ago so it backwards fills with 0s
In [27]:
count_data = weekly_totals
n_count_data = len(weekly_totals)
plt.bar( range(n_count_data), weekly_totals );
print weekly_totals
print n_count_data
In [12]:
import pymc as pm
alpha = 1.0/count_data.mean() # Recall count_data is the
# variable that holds our txt counts
lambda_1 = pm.Exponential("lambda_1", alpha)
lambda_2 = pm.Exponential("lambda_2", alpha)
tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data)
In [13]:
@pm.deterministic
def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):
out = np.zeros(n_count_data)
out[:tau] = lambda_1 # lambda before tau is lambda1
out[tau:] = lambda_2 # lambda after tau is lambda2
return out
In [14]:
observation = pm.Poisson("obs", lambda_, value=count_data, observed=True)
model = pm.Model([observation, lambda_1, lambda_2, tau])
In [15]:
### Mysterious code to be explained in Chapter 3.
mcmc = pm.MCMC(model)
mcmc.sample(40000, 10000, 1)
In [16]:
lambda_1_samples = mcmc.trace('lambda_1')[:]
lambda_2_samples = mcmc.trace('lambda_2')[:]
tau_samples = mcmc.trace('tau')[:]
In [29]:
figsize(12.5, 10)
#histogram of the samples:
ax = plt.subplot(311)
ax.set_autoscaley_on(False)
plt.hist(lambda_1_samples, histtype='stepfilled', bins=30, alpha=0.85,
label="posterior of $\lambda_1$", color="#A60628", normed=True)
plt.legend(loc="upper left")
plt.title(r"Posterior distributions of the variables \
$\lambda_1,\;\lambda_2,\;\tau$")
plt.xlabel("$\lambda_1$ value")
ax = plt.subplot(312)
ax.set_autoscaley_on(False)
plt.hist(lambda_2_samples, histtype='stepfilled', bins=30, alpha=0.85,
label="posterior of $\lambda_2$", color="#7A68A6", normed=True)
plt.legend(loc="upper left")
plt.xlabel("$\lambda_2$ value")
plt.subplot(313)
#w = 1.0 / tau_samples.shape[0] * np.ones_like(tau_samples)
plt.hist(tau_samples, bins=n_count_data, alpha=1,
label=r"posterior of $\tau$",
color="#467821", rwidth=2.)
plt.xticks(np.arange(n_count_data))
plt.legend(loc="upper left")
plt.ylim([0, .75])
plt.xlabel("$\tau$ (in days)")
plt.ylabel("probability")
Out[29]:
In [21]:
n_count_data
Out[21]:
In [28]:
lambda_2_samples
Out[28]:
In [ ]: