In [1]:
import numpy as np, scipy.linalg as sl
import scipy.special as ss, math
import matplotlib.pylab as plt
import piccard as pic
import os, glob
%matplotlib inline
Piccard's basic data format is HDF5. It does interface with tempo2 through libstempo, but by using only HDF5 as a requirement (h5py installable through pip) we don't have all of tempo2 as a mandatory dependency. This is easier on clusters.
So, before we can do anything, we need to create HDF5 files from tempo2 par/tim files. The HDF5 files (built as a mini file-system) basically just have a separate container for each pulsar, so many pulsars can be present in a single HDF5 file. The HDF5 file actually also stores the full par and tim files inside its own dataformat, so we'll never need the par/tim files again; they can be extracted from the HDF5 file.
We add them as follows
In [ ]:
t2df = pic.DataFile('J0030.h5') # Create the HDF5 file (does not need to exist)
t2df.addTempoPulsar('./partim/J0030+0451.mdc1-open1.par', './partim/J0030+0451.mdc1-open1.tim') # Add/replace the pulsar
# After this, we can add another pulsar to the HDF5 file by calling addTempoPulsar again
The main object of Piccard is the likelihood object. Initializing such an object is a 2-step process. We have to initialize the tempo2 model, and we have to initialize the likelihood model. The tempo2 information is stored in the HDF5 file. The likelihood model can be stored in JSON files (but not necessary for operation). Once the likelihood object is initialized, we can start our analysis with MCMCs and the like.
In [6]:
# Basic likelihood object initialization
likob = pic.ptaLikelihood('J0030.h5')
In [7]:
# Initializing the likelihood model is done through a Python dictionary. Create them like this:
modeldict = likob.makeModelDict(nfreqs=20, incRedNoise=True, noiseModel='powerlaw', varyEfac=True, separateEfacs=False, \
incEquad=False, separateEquads=False, \
ndmfreqs=0, incDM=False, dmModel='dmspectrum', \
incCEquad=False, separateCEquads=False, \
incBWM=False, incGWB=False, gwbModel='powerlaw', \
likfunc='mark3')
likob.initModel(modeldict, fromFile=False, verbose=False)
In [4]:
# We can write the model dictionary to a JSON file (human readable) as follows
likob.writeModelToFile('J0030.json')
In [2]:
# Given a JSON and an HDF5 file, we could have also initialized the likelihood object like this:
likob = pic.ptaLikelihood('J0030.h5', 'J0030.json', noGmat=False)
In [6]:
psr = likob.ptapsrs[0] # First pulsar in the array
mjd = psr.toas/pic.pic_spd + pic.pic_T0 # Piccard stores things in seconds, not mjd's. Convert these
res = 1e6*psr.residuals # Timing residuals in microseconds
err = 1e6*psr.toaerrs # Delta TOA
plt.errorbar(mjd, res, yerr=err, fmt='.', c='blue')
plt.title('Residuals of ' + psr.name)
plt.xlabel('MJD')
plt.ylabel(r'Residual [$\mu$s]')
Out[6]:
In [7]:
# The design matrix, and related quantities are also available as a matrix of psr
print psr.Mmat.shape, psr.Gmat.shape, psr.Fmat.shape
In [8]:
# We have access to all the model parameters through likob.pardes
for pd in likob.pardes:
print pd
You can see above that we have initialized the likelihood function as 'mark3'. There are various likelihood functions present in Piccard, which all have their purpose. Not a whole lot of checking is done, so be careful which one you select for which job
When just beginning to work with Piccard, just stick to 'mark3', 'mark6', and perhaps 'gibbs'.
In [9]:
# Just for reference, the log-likelihood can be called with
print likob.loglikelihood(likob.pstart)
print likob.logposterior(likob.pstart)
print likob.logprior(likob.pstart)
In [10]:
# Run a simple MCMC, using the PAL PTMCMC sampler.
if not os.path.isdir('./chains'):
os.mkdir('./chains')
pic.RunPTMCMC(likob, 10000, './chains/') # Run a chain for 10k steps
Out[10]:
In [11]:
# In the file './chains/ptparameters.txt', we now have a description of all the parameters in the MCMC file.
# This is used by ReadMCMCFile.
# lp: log-posterior values
# ll: log-likelihood values
# chain: All the model parameters
# labels: The names of all the model parameters
(lp, ll, chain, labels) = pic.ReadMCMCFile('./chains/')
In [12]:
# We see we need to burn very few samples (<100)
plt.plot(lp)
plt.xlabel('Sample #')
plt.ylabel('log-posterior')
Out[12]:
In [13]:
# Burn 100 steps, and make a triangle plot for all parameters
pic.triplot(chain[100:], labels)
In [14]:
# makeAllPlots just makes a whole bunch of useful plots, and saves these to png files in
pic.makeAllPlots(chainfile='./chains/', outputdir='./chains/', burnin=100, skipTMP=False, triplot_hm=False)
In [38]:
# Now same thing with a Gibbs sampler. Now use a spectrum model for the noise
# Initializing the likelihood model is done through a Python dictionary. Create them like this:
modeldict = likob.makeModelDict(nfreqs=20, incRedNoise=True, noiseModel='spectrum', varyEfac=True, separateEfacs=False, \
incEquad=False, separateEquads=False, \
ndmfreqs=0, incDM=False, dmModel='dmspectrum', \
incCEquad=False, separateCEquads=False, \
incBWM=False, incGWB=False, gwbModel='powerlaw', \
likfunc='gibbs')
likob.initModel(modeldict, fromFile=False, verbose=False)
# Don't run PTMCMC, but Gibbs_mark1 (first Gibbs sampler of van Haasteren & Vallisneri 2014)
pic.RunGibbs_mark1(likob, 2000, './chains/')
In [42]:
# Read the chain. We have many more parameters now (69)
(lp, ll, chain, labels) = pic.ReadMCMCFile('./chains/')
print chain.shape
In [43]:
# Burn the first 100 steps, and plot only the first 15 parameters
pic.triplot(chain[100:,:15], labels[:15])
In [47]:
# Make a power-spectrum plot (spectrum starts at index '1', since '0' is the efac)
psr = likob.ptapsrs[0]
freqs = psr.Ffreqs
pic.makespectrumplot(chain[100:], parstart=1, numfreqs=20, freqs=freqs)
In [48]:
# All the plots are automatically made, again, with:
pic.makeAllPlots(chainfile='./chains/', outputdir='./chains/', burnin=100, skipTMP=False, triplot_hm=False)
In [ ]:
In [3]:
# Doing a NANOGrav 5yr pulsar. Create the HDF5 file, and set the model:
t2df = pic.DataFile('B1855.h5')
t2df.addTempoPulsar('partim/B1855+09_NANOGrav_dfg+12.par', 'partim/B1855+09_NANOGrav_dfg+12.tim')
likob = pic.ptaLikelihood('B1855.h5')
# Include DM variations, Jitter (use mark4), and separate all white-noise components per backend
modeldict = likob.makeModelDict(nfreqs=20, incRedNoise=True, noiseModel='powerlaw', varyEfac=True, separateEfacs=True, \
incEquad=True, separateEquads=True, \
ndmfreqs=20, incDM=True, dmModel='dmpowerlaw', \
incCEquad=True, separateCEquads=True, \
incBWM=False, incGWB=False, gwbModel='powerlaw', \
likfunc='mark4')
likob.initModel(modeldict, fromFile=False, verbose=False)
In [4]:
# Now we have more dimensions:
print "Dimensions:", likob.dimensions
for pd in likob.pardes:
print pd
In [5]:
# Only 2.5ms per likelihood call
%timeit likob.loglikelihood(likob.pstart)
In [6]:
# Maximize the likelihood with Particle Swarm:
pic.RunPSO(likob, './chains')
# Maximum is saved in the file 'pso.txt'
mlpars = np.loadtxt('./chains/pso.txt')
print "Maximum parameters", mlpars[1:]
print "Max posterior:", mlpars[0]
# 1.15153745 -6.28756502 -6.27438197 -17.52630549 3.69660418 -9.94143639 0.55095782
In [7]:
# Running the MCMC chain works as before (need more steps in 7 dimensions):
pic.RunPTMCMC(likob, 50000, './chains/')
In [8]:
# We now need a larger burn-in time. About 5000 seems ok
(lp, ll, chain, labels) = pic.ReadMCMCFile('./chains/')
plt.plot(lp)
Out[8]:
In [9]:
pic.triplot(chain[5000:], labels)
In [10]:
# We can also create the autocorrelation function
Nlags = 1000 # Number of lags to examine
Npars = 7 # Number of parameters to plot
burnin = 5000
tlag = np.arange(1, Nlags+1)
acf = np.zeros((Nlags, Npars))
for ii in range(Npars):
acf[:,ii] = pic.getacf(chain[burnin:,ii], tlag)
In [11]:
# We see that the autocorrelation time is about 100
for ii in range(Npars):
plt.plot(tlag, acf)
plt.legend((labels[:7]))
Out[11]:
Say we want to do inference on the timing model parameters. There are two ways to go about doing this:
The Gibbs sampler does number (2) by design. However, for reasons of numerical precision, these cannot really be trusted as of this moment. The design matrix has a very large dynamic range, so a parameter transformation is made to orthonormal parameters that span the basis of the timing model parameters. This is an invertible transformation. However, when transforming back to the timing model parameter basis (with the huge dynamic range), some information is lost. So at this moment, we should not use the timing model parameter output of the Gibbs sampler for science (even though the runs are ok). This issue is under investigation.
So basically, we need to numerically include the timing model in our MCMC chain (so mark3, mark4, or mark6) in order to do inference on these parameter. We do that as follows
In [2]:
likob = pic.ptaLikelihood('B1855.h5')
# Include DM variations, Jitter (use mark4), and separate all white-noise components per backend
modeldict = likob.makeModelDict(nfreqs=20, incRedNoise=True, noiseModel='powerlaw', varyEfac=True, separateEfacs=True, \
incEquad=True, separateEquads=True, \
ndmfreqs=20, incDM=False, dmModel='dmpowerlaw', \
incCEquad=True, separateCEquads=True, \
incTimingModel=True, nonLinear=False, compression='timingmodel', \
keepTimingModelPars=['JUMP1', 'JUMP2', 'JUMP3'], \
likfunc='mark4')
likob.initModel(modeldict, fromFile=False, verbose=False)
In [3]:
# Run the chain as before
pic.RunPTMCMC(likob, 50000, './chains/')
In [4]:
pic.makeAllPlots('./chains/', './chains/', burnin=10000)
In [10]:
In [10]:
# Run a simple MCMC, using the PAL PTMCMC sampler.
if os.path.isdir('./pc'):
import shutil
shutil.rmtree('./pc')
os.mkdir('./pc')
pic.RunPolyChord(likob, './pc/pc-', n_live_points=500, n_chords=1)
pic.RunMultiNest(likob, './chains/mn-')
In [17]:
""" Powerlaw
Nested Sampling ln(Z): 1760.102986
Acceptance Rate: 0.349502
Replacements: 6463
Total Samples: 18492
Nested Sampling ln(Z): 1760.109836
ln(ev)= 1760.4052460735684 +/- 0.13687495892900020
Total Likelihood Evaluations: 18492
Sampling finished. Exiting MultiNest
________________________________________
| |
| ndead = 9060 |
| log(Z) = 1760.14516 +/- 0.13952 |
| check = 1768.29993 +/- 0.13952 |
|________________________________________|
Spectrum:
Acceptance Rate: 0.126976
Replacements: 17174
Total Samples: 135254
Nested Sampling ln(Z): 1742.745082
ln(ev)= 1742.8239108064331 +/- 0.23108293278665604
Total Likelihood Evaluations: 135254
Sampling finished. Exiting MultiNest
________________________________________
| |
| ndead = 20994 |
| log(Z) = 1740.43472 +/- 0.23558 |
| check = 1792.30463 +/- 0.23558 |
|________________________________________|
2015-02-11 09:07:47.868 [NotebookApp] Saving notebook at /piccard-demo.ipynb
"""
Out[17]:
In [21]:
datpc = np.loadtxt('pc/pc-.txt')
datmn = np.loadtxt('chains/mn-.txt')
In [22]:
import triangle
In [24]:
triangle.corner(datpc[:,2:], weights=datpc[:,0])
Out[24]:
In [31]:
triangle.corner(datmn[2000:,2:], weights=datmn[2000:,0])
Out[31]:
In [ ]: