In [ ]:
import numpy as np
import matplotlib.pylab as plt
import corner
import numdifftools as nd
import glob
import scipy.optimize as so
import scipy.linalg as sl
from PTMCMCSampler import PTMCMCSampler

%matplotlib inline

In [ ]:
class GaussianLikelihood(object):
    
    def __init__(self, ndim=2, pmin=-10, pmax=10):
        
        self.a = np.ones(ndim)*pmin
        self.b = np.ones(ndim)*pmax
        
    def lnlikefn(self, x):
        return -0.5*np.sum(x**2)-len(x)*0.5*np.log(2*np.pi)
    
    def lnlikefn_grad(self, x):
        ll = -0.5*np.sum(x**2)-len(x)*0.5*np.log(2*np.pi)
        ll_grad = -x
        return ll, ll_grad
    
    def lnpriorfn(self, x):
        
        if np.all(self.a <= x) and np.all(self.b >= x):
            return 0.0
        else:
            return -np.inf  
        return 0.0
    
    def lnpriorfn_grad(self, x):
        return self.lnpriorfn(x), np.zeros_like(x)
    
    def lnpost_grad(self, x):
        ll, ll_grad = self.lnlikefn_grad(x)
        lp, lp_grad = self.lnpriorfn_grad(x)
        return ll+lp, ll_grad+lp_grad
    
    def lnpost(self, x):
        return lnpost_grad(x)[0]
    
    def hessian(self, x):
        return -np.eye(len(x))

In [ ]:
class intervalTransform(object):
    """
    Wrapper class of the likelihood for Hamiltonian samplers. This implements a
    coordinate transformation for all parameters from an interval to all real numbers.
    """
    def __init__(self, likob, pmin=None, pmax=None):
        """Initialize the intervalLikelihood with a ptaLikelihood object"""
        self.likob = likob
        
        if pmin is None:
            self.a = likob.a
        else:
            self.a = pmin * np.ones_like(likob.a)
        
        if pmax is None:
            self.b = likob.b
        else:
            self.b = pmax * np.ones_like(likob.b)

    def forward(self, x):
        """Forward transform the real coordinates (on the interval) to the
        transformed coordinates (on all real numbers)
        """
        p = np.atleast_2d(x.copy())
        posinf, neginf = (self.a == x), (self.b == x)
        m = ~(posinf | neginf)
        p[:,m] = np.log((p[:,m] - self.a[m]) / (self.b[m] - p[:,m]))
        p[:,posinf] = np.inf
        p[:,neginf] = -np.inf
        return p.reshape(x.shape)

    def backward(self, p):
        """Backward transform the transformed coordinates (on all real numbers)
        to the real coordinates (on the interval)
        """
        x = np.atleast_2d(p.copy())
        x[:,:] = (self.b[:] - self.a[:]) * np.exp(x[:,:]) / (1 +
                np.exp(x[:,:])) + self.a[:]
        return x.reshape(p.shape)
    
    def logjacobian_grad(self, p):
        """Return the log of the Jacobian at point p"""
        lj = np.sum( np.log(self.b[:]-self.a[:]) + p[:] -
                2*np.log(1.0+np.exp(p[:])) )

        lj_grad = np.zeros_like(p)
        lj_grad[:] = (1 - np.exp(p[:])) / (1 + np.exp(p[:]))
        return lj, lj_grad

    def dxdp(self, p):
        """Derivative of x wrt p (jacobian for chain-rule) - diagonal"""
        pp = np.atleast_2d(p)
        d = np.ones_like(pp)
        d[:,:] = (self.b[:]-self.a[:])*np.exp(pp[:,:])/(1+np.exp(pp[:,:]))**2
        return d.reshape(p.shape)

    def d2xd2p(self, p):
        """Derivative of x wrt p (jacobian for chain-rule) - diagonal"""
        pp = np.atleast_2d(p)
        d = np.zeros_like(pp)
        d[:,:] = (self.b[:]-self.a[:])*(np.exp(2*pp[:,:])-np.exp(pp[:,:]))/(1+np.exp(pp[:,:]))**3
        return d.reshape(p.shape)

    def logjac_hessian(self, p):
        """The Hessian of the log-jacobian"""
        # p should not be more than one-dimensional
        assert len(p.shape) == 1

        return np.diag(-2*np.exp(p) / (1+np.exp(p))**2)

    def lnlikefn_grad(self, p, **kwargs):
        """The log-likelihood in the new coordinates"""
        x = self.backward(p)
        ll, ll_grad = self.likob.lnlikefn_grad(x, **kwargs)
        lj, lj_grad = self.logjacobian_grad(p)
        return ll+lj, ll_grad*self.dxdp(p)+lj_grad
    
    def lnlikefn(self, p, **kwargs):
        return self.lnlikefn_grad(p)[0]

    def lnpriorfn_grad(self, p, **kwargs):
        """The log-prior in the new coordinates. Do not include the Jacobian"""
        x = self.backward(p)
        lp, lp_grad = self.likob.lnpriorfn_grad(x)
        return lp, lp_grad*self.dxdp(p)
    
    def lnpriorfn(self, p, **kwargs):
        return self.lnpriorfn_grad(p)[0]

    def logpostfn_grad(self, p, **kwargs):
        """The log-posterior in the new coordinates"""
        x = self.backward(p)
        lp, lp_grad = self.likob.lnpost_grad(x)
        lj, lj_grad = self.logjacobian_grad(p)
        return lp+lj, lp_grad*self.dxdp(p)+lj_grad


    def hessian(self, p):
        """The Hessian matrix in the new coordinates"""
        # p should not be more than one-dimensional
        assert len(p.shape) == 1

        # Get quantities from un-transformed distribution
        x = self.backward(p)
        orig_hessian = self.likob.hessian(x)
        _, orig_lp_grad = self.likob.lnpost_grad(x)

        # Transformation properties
        hessian = self.logjac_hessian(p)
        dxdpf = np.diag(self.dxdp(p))

        hessian += np.dot(dxdpf.T, np.dot(orig_hessian, dxdpf))
        hessian -= np.diag(self.d2xd2p(p)*orig_lp_grad)

        return hessian

In [ ]:
ndim = 40
glo = GaussianLikelihood(ndim=ndim, pmin=0.0, pmax=10.0)
glt = intervalTransform(glo, pmin=0.0, pmax=10)
gl = glt

In [ ]:
# Demonstrate that the gradients are accurate
p0 = np.ones(ndim)*0.01
ndjac = nd.Jacobian(gl.lnlikefn)
ndhes = nd.Hessian(gl.lnlikefn)
ndhesd = nd.Hessdiag(gl.lnlikefn)

print p0[:4]
print gl.lnlikefn_grad(p0)[1][:4]
print ndjac(p0)[:4]
print np.diag(gl.hessian(p0))[:4]
print ndhesd(p0)[:4]

In [ ]:
# Maximize using scipy
result = so.minimize(lambda x: -gl.lnlikefn(x), p0, jac=lambda x: -gl.lnlikefn_grad(x)[1],
                     method='Newton-CG', hess=lambda x: -gl.hessian(x), options={'disp':True})

In [ ]:
# Set the start position and the covariance
p0 = result['x']
h0 = gl.hessian(p0)
cov = sl.cho_solve(sl.cho_factor(-h0), np.eye(len(h0)))
print np.diag(cov)[:10]

In [ ]:
sampler = PTMCMCSampler.PTSampler(ndim, gl.lnlikefn, gl.lnpriorfn, np.copy(cov),
                                  logl_grad=gl.lnlikefn_grad, logp_grad=gl.lnpriorfn_grad,
                                  outDir='./chains')

In [ ]:
sampler.sample(p0, 60000, burn=500, thin=1, covUpdate=500,
               SCAMweight=10, AMweight=10, DEweight=10, NUTSweight=10, HMCweight=10, MALAweight=0,
               HMCsteps=100, HMCstepsize=0.4)

In [ ]:
jumpfiles = glob.glob('chains/*jump.txt')
jumps = map(np.loadtxt, jumpfiles)
for ct, j in enumerate(jumps):
    plt.plot(j, label=jumpfiles[ct].split('/')[-1].split('_jump.txt')[0])
plt.legend(loc='best', frameon=False)
plt.ylabel('Acceptance Rate')
plt.ylim(0.0, 1.1)

In [ ]:
data = np.loadtxt('chains/chain_1.txt')
chaint = data[:,:-4]

In [ ]:
plt.plot(data[0:,-4])

In [ ]:
chain = glt.backward(chaint)

In [ ]:
corner.corner(chaint[:,:3], bins=50);

In [ ]:
corner.corner(chain[:,:3], bins=50);

In [ ]: