In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
# sns.set(rc={"figure.figsize": (16, 12)})
sns.set_style('white')
sns.set_style('ticks')
sns.set_context("paper")

%config InlineBackend.figure_format = 'retina'
# %qtconsole --colors=linux

import numpy as np
import pymc3 as pm
import theano.tensor as tt 
import scipy
from scipy import stats

# Convenience functions to time execution (and display start time)
class timeit():
    from datetime import datetime
    def __enter__(self):
        self.tic = self.datetime.now()
        print('Start: {}'.format(self.tic) )
    def __exit__(self, *args, **kwargs):
        print('Runtime: {}'.format(self.datetime.now() - self.tic))

# from pymc3.distributions.timeseries import EulerMaruyama

In [ ]:
# %qtconsole

In [ ]:
T = 2
dt = 0.1
nsteps = T/dt

amplitude = 20
phi  = 0.0
f = 0.5

time = np.arange(0, T+dt, dt)

def true_control(t, amplitude, phi=0): 
    return(amplitude * np.sin(2 * np.pi * f * t + phi))

alpha = true_control(time, amplitude=20, phi=0.0)

nsteps = int(T/dt)

x = np.zeros((nsteps+1, 2))
y = np.zeros((nsteps+1, 1))

mvnorm = stats.multivariate_normal
covar = np.diag([0.2, 0.1])
sigma_y = 1.7

x[0] = np.zeros(2)

A = np.array([[1, dt], [0, 1]])
B = np.array([0.5 * dt**2, dt])

# x[,0] is the position
# x[, 1] is the velcocity

# This is written without matrix notation
# for t in range(1, nsteps):
#     x[t, 0] = x[t-1, 0] + Δt*x[t-1, 1] + 0.5 * Δt**2 * α[t-1]
#     x[t, 1] = x[t-1, 1] + Δt * α[t-1]

# This is in matrix notation
# x[t] = np.dot(true_A, x[t-1].T) + np.dot(true_B, α[t-1])
# where α[t-1] is the ("known") control input
    

# Generated data
for t in range(1, nsteps+1):
#     x_t = Ax[t-1] + Bu[t-1]
    x[t] = mvnorm.rvs(mean=np.dot(A, x[t-1].T) + np.dot(B, true_control(dt*(t-1), amplitude, phi=0)),
                      cov=covar, size=1)
#     x[t] = mvnorm.rvs(mean=np.dot(true_A, x[t-1].T) + np.dot(true_B, α[t-1]), cov=Σ, size=1)
#     y is the noisy observation of x[, 1], i.e. velocity
    y[t] = stats.norm.rvs(loc=x[t, 1], scale=sigma_y, size=1)
    

#print(f"True A: \n{true_A}")
#print(f"True B: \n{true_B}")


plt.plot(time, alpha, 'k--', linewidth=6, alpha=0.25, label='alpha')
plt.plot(time, x, linewidth=6, label='pos')

# [plt.plot(time, var) for var in [α, x]]

plt.plot(time, y, '.', markersize=20, color='black', alpha=0.6, label='obs')
sns.despine(offset=10, trim=True)
plt.legend(loc='best')

In [ ]:
# Better prior for SD? Inverse Gamma seems to work better than Half-Cauchy.

sd_prior = scipy.stats.invgamma.rvs(a=6, loc=0.4, scale=3, size=1000)
sns.distplot(sd_prior, kde=True);

# fig, ax = plt.subplots(1, 1)
# x = np.linspace(scipy.stats.halfcauchy.ppf(0.01), scipy.stats.halfcauchy.ppf(0.99), 100)
# ax.plot(x, scipy.stats.halfcauchy.pdf(x), 'r-', lw=5, alpha=0.6, label='halfcauchy pdf')

In [ ]:
d_prior = scipy.stats.beta.rvs(a=9, b = 3, size=1000)
sns.distplot(d_prior, kde=False);

Generative model


In [ ]:
nsteps = len(y)

from theano.compile.ops import as_op

# @as_op(itypes=[tt.lscalar, tt.dscalar, tt.dscalar], otypes=[tt.dscalar])
def control(t, direction, amplitude, phi=0): 
    return(direction*amplitude * tt.sin(2 * np.pi * 0.5 * t + phi))
    
basic_model=pm.Model()

with basic_model:
    pos = np.empty(nsteps, dtype='object')
    vel = np.empty(nsteps, dtype='object')
    
    pos[0]=pm.Normal('pos0', mu=0, sd=2)
    vel[0]=pm.Normal('vel0', mu=0, sd=2)
    
    p_dir = 2 * pm.Beta("p_dir", alpha=9, beta=3) - 1
    direction = pm.Bernoulli("D", p=p_dir)
    
    sd_obs = pm.InverseGamma("sd_obs", alpha=1, beta=2)
    sd_pos = pm.InverseGamma("sd_pos", alpha=6, beta=3)
    sd_vel = pm.InverseGamma("sd_vel", alpha=6, beta=3)

    
    amplitude = 20
    
    for t in range(1, nsteps):
        u = control(t-1, direction, amplitude, 0.0)
        
        pos[t] = pm.Normal('pos'+str(t), mu=pos[t-1] + dt*vel[t-1] + 0.5*dt**2 * u, sd=sd_pos)
        vel[t] = pm.Normal('vel'+str(t), mu=vel[t-1] + dt*u, sd=sd_vel)
        
        y_obs=pm.Normal("y_obs"+str(t), mu=vel[t], observed=y[t], sd=sd_obs)

Nuts


In [ ]:
with timeit():
    with basic_model:
        trace = pm.sample(2000, njobs = 4)

In [ ]:
xr=range(nsteps)

# Unpack all the trace for plotting
pos_all=[[atrace['pos'+str(t)] for atrace in trace[1000:]] for t in xr]
vel_all=[[atrace['vel'+str(t)] for atrace in trace[1000:]] for t in xr]
sd_obs_all=[atrace['sd_obs'] for atrace in trace[1000:]]
sd_pos_all=[atrace['sd_pos'] for atrace in trace[1000:]]
sd_vel_all=[atrace['sd_vel'] for atrace in trace[1000:]]

plt.figure( figsize=(15,5))
plt.subplot(1,3,1)
plt.violinplot(pos_all, positions=xr, widths=1.0)
plt.plot(xr, x[:,0], 'o-')

plt.subplot(1,3,2)
plt.violinplot(vel_all, positions=xr, widths=1.0)
plt.plot( x[:,1], 'o-')
plt.plot( y, 'ko')

plt.subplot(1,3,3)
plt.violinplot( [sd_obs_all,sd_pos_all, sd_vel_all] );
plt.xticks([1,2,3], ['Observation', 'Position', 'Velocity'],size=16 );

plt.savefig('inferred.pdf', bbox_inches='tight')

In [ ]:
pm.forestplot(trace, varnames = ['sd_vel', 'sd_pos', 'sd_obs'])
pm.plot_posterior(trace, varnames = ['sd_vel', 'sd_pos', 'sd_obs', 'D', 'p_dir'])
pm.traceplot(trace, varnames = ['sd_vel', 'sd_pos', 'sd_obs'])
pm.traceplot(trace, varnames = ['p_dir'])

In [ ]:
trace[1000:]['D']

Variational inference


In [ ]:
# variational inference:
with timeit():
    with basic_model:
        v_params = pm.variational.advi(n=100000)
        means, sd, elbos = v_params
        samples = pm.sample_vp(v_params, draws=2000)

In [ ]:
sns.distplot(samples['A'], color='red')

In [ ]:
varnames = ['sd_obs', 'sd_pos', 'sd_vel']
[sns.distplot(samples[var]) for var in varnames]

In [ ]:
sns.distplot(samples['sd_vel'], color='blue', label="ADVI")
sns.distplot(trace['sd_vel'], color='red')

In [ ]:
sns.distplot(samples['sd_obs'], color='blue', label="ADVI")
sns.distplot(trace['sd_obs'], color='red')

In [ ]:
# varnames = means.keys()
# fig, axs = plt.subplots(ncols=len(varnames), figsize=(15, 5))
# for var, ax in zip(varnames, axs):
#     mu_arr = means[var]
#     sigma_arr = sds[var]
#     ax.set_title(var)
#     for i, (mu, sigma) in enumerate(zip(mu_arr.flatten(), sigma_arr.flatten())):
#         sd3 = (-4*sigma + mu, 4*sigma + mu)
#         x = np.linspace(sd3[0], sd3[1], 300)
#         y = stats.norm(mu, sigma).pdf(x)
#         ax.plot(x, y)
        
# fig.tight_layout()