In [1]:
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (15.0, 8.0)
In [2]:
# First, we need to know what's in the data file.
!head -15 R11ceph.dat
In [3]:
class Cepheids(object):
def __init__(self,filename):
# Read in the data and store it in this master array:
self.data = np.loadtxt(filename)
self.hosts = self.data[:,1].astype('int').astype('str')
# We'll need the plotting setup to be the same each time we make a plot:
colornames = ['red','orange','yellow','green','cyan','blue','violet','magenta','gray']
self.colors = dict(zip(self.list_hosts(), colornames))
self.xlimits = np.array([0.3,2.3])
self.ylimits = np.array([30.0,17.0])
return
def list_hosts(self):
# The list of (9) unique galaxy host names:
return np.unique(self.hosts)
def select(self,ID):
# Pull out one galaxy's data from the master array:
index = (self.hosts == str(ID))
self.mobs = self.data[index,2]
self.merr = self.data[index,3]
self.logP = np.log10(self.data[index,4])
return
def plot(self,X):
# Plot all the points in the dataset for host galaxy X.
ID = str(X)
self.select(ID)
plt.rc('xtick', labelsize=16)
plt.rc('ytick', labelsize=16)
plt.errorbar(self.logP, self.mobs, yerr=self.merr, fmt='.', ms=7, lw=1, color=self.colors[ID], label='NGC'+ID)
plt.xlabel('$\\log_{10} P / {\\rm days}$',fontsize=20)
plt.ylabel('${\\rm magnitude (AB)}$',fontsize=20)
plt.xlim(self.xlimits)
plt.ylim(self.ylimits)
plt.title('Cepheid Period-Luminosity (Riess et al 2011)',fontsize=20)
return
def overlay_straight_line_with(self,a=0.0,b=24.0):
# Overlay a straight line with gradient a and intercept b.
x = self.xlimits
y = a*x + b
plt.plot(x, y, 'k-', alpha=0.5, lw=2)
plt.xlim(self.xlimits)
plt.ylim(self.ylimits)
return
def add_legend(self):
plt.legend(loc='upper left')
return
In [4]:
data = Cepheids('R11ceph.dat')
print(data.colors)
OK, now we are all set up! Let's plot one of the datasets.
In [5]:
data.plot(4258)
# for ID in data.list_hosts():
# data.plot(ID)
data.overlay_straight_line_with(a=-2.0,b=24.0)
data.add_legend()
$m = a\;\log_{10} P + b$
$m^{\rm obs} = 24.51 \pm 0.31$ at $\log_{10} P = \log_{10} (13.0/{\rm days})$
We should now be able to code up functions for the log likelihood, log prior and log posterior, such that we can evaluate them on a 2D parameter grid. Let's fill them in:
In [6]:
def log_likelihood(logP,mobs,merr,a,b):
return 0.0 # m given a,b? mobs given m? Combining all data points?
def log_prior(a,b):
return 0.0 # Ranges? Functions?
def log_posterior(logP,mobs,merr,a,b):
return log_likelihood(logP,mobs,merr,a,b) + log_prior(a,b)
Now, let's set up a suitable parameter grid and compute the posterior PDF!
In [7]:
# Select a Cepheid dataset:
data.select(4258)
# Set up parameter grids:
npix = 100
amin,amax = -4.0,-2.0
bmin,bmax = 25.0,27.0
agrid = np.linspace(amin,amax,npix)
bgrid = np.linspace(bmin,bmax,npix)
logprob = np.zeros([npix,npix])
# Loop over parameters, computing unnormlized log posterior PDF:
for i,a in enumerate(agrid):
for j,b in enumerate(bgrid):
logprob[j,i] = log_posterior(data.logP,data.mobs,data.merr,a,b)
# Normalize and exponentiate to get posterior density:
Z = np.max(logprob)
prob = np.exp(logprob - Z)
norm = np.sum(prob)
prob /= norm
Now, plot, with confidence contours:
In [8]:
sorted = np.sort(prob.flatten())
C = sorted.cumsum()
# Find the pixel values that lie at the levels that contain
# 68% and 95% of the probability:
lvl68 = np.min(sorted[C > (1.0 - 0.68)])
lvl95 = np.min(sorted[C > (1.0 - 0.95)])
plt.imshow(prob, origin='lower', cmap='Blues', interpolation='none', extent=[amin,amax,bmin,bmax])
plt.contour(prob,[lvl68,lvl95],colors='black',extent=[amin,amax,bmin,bmax])
plt.grid()
plt.xlabel('slope a')
plt.ylabel('intercept b / AB magnitudes')
Out[8]:
In [9]:
data.plot(4258)
data.overlay_straight_line_with(a=-3.0,b=26.3)
data.add_legend()
In [10]:
prob_a_given_data = np.sum(prob,axis=0)
prob_b_given_data = np.sum(prob,axis=1)
In [11]:
print(prob_a_given_data.shape, np.sum(prob_a_given_data))
In [12]:
# Plot 1D distributions:
fig,ax = plt.subplots(nrows=1, ncols=2)
fig.set_size_inches(15, 6)
plt.subplots_adjust(wspace=0.2)
left = ax[0].plot(agrid, prob_a_given_data)
ax[0].set_title('${\\rm Pr}(a|d)$')
ax[0].set_xlabel('slope $a$')
ax[0].set_ylabel('Posterior probability density')
right = ax[1].plot(bgrid, prob_b_given_data)
ax[1].set_title('${\\rm Pr}(a|d)$')
ax[0].set_xlabel('intercept $b$ / AB magnitudes')
ax[1].set_ylabel('Posterior probability density')
Out[12]:
In [13]:
# Compress each PDF into a median and 68% credible interval, and report:
def compress_1D_pdf(x,pr,ci=68,dp=1):
# Interpret credible interval request:
low = (1.0 - ci/100.0)/2.0 # 0.16 for ci=68
high = 1.0 - low # 0.84 for ci=68
# Find cumulative distribution and compute percentiles:
cumulant = pr.cumsum()
pctlow = x[cumulant>low].min()
median = x[cumulant>0.50].min()
pcthigh = x[cumulant>high].min()
# Convert to error bars, and format a string:
errplus = np.abs(pcthigh - median)
errminus = np.abs(median - pctlow)
report = "$ "+str(round(median,dp))+"^{+"+str(round(errplus,dp))+"}_{-"+str(round(errminus,dp))+"} $"
return report
In [14]:
print("a = ",compress_1D_pdf(agrid,prob_a_given_data,ci=68,dp=2))
print("b = ",compress_1D_pdf(bgrid,prob_b_given_data,ci=68,dp=2))
In [ ]: