In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
import emcee
In fitting spectrum routine, given an observed spectrum and a computational model to simulate the spectrum, we are interested in determining the parameters (with its error) that gives the most similar spectrum to the observed one.
If we know the "true" underlying model of the spectrum (i.e. we can simulate the spectrum exactly, given a set of parameters), obtaining the error of the parameters are just like a linear fit. However, if we only have an approximate model, the error can be severely underdetermined.
In this notebook, I will show the problem of fitting with an approximate model and how to solve it (hopefully).
In this notebook, we consider a simplified model of generating a spectrum. Let's say we have 4 parameters in which determining the values at $x = (0, 0.3, 0.7, 1.0)$. All the other values are obtained using the cubic spline interpolation. We call this the "cubic" model which is the true model. Suppose in an experiment, we can observe 1500 data points linearly spaced from $[0, 1.0]$, with some noise.
In [2]:
np.random.seed(123)
ndata = 1500
xparameters = [0, 0.3, 0.7, 1.0]
parameters = [0.1, 0.7, 0.2, 0.4]
sigma_noise = 0.05
finterp1 = interp1d(xparameters, parameters, kind="cubic")
xdata = np.linspace(0, 1, ndata)
ydata = finterp1(xdata) + np.random.randn(*xdata.shape) * sigma_noise
plt.plot(xdata, ydata, '.')
plt.show()
This is the data we observe (as an example).
Assume that we are in an ideal world where we know the true model and we also know the noise level. The Bayesian inference of the parameters, $\mathbf{p}$, knowing the data, $\mathcal{D}$, can be written as
$$\begin{equation} P(\mathbf{p} | \mathcal{D}) \propto P(\mathcal{D} | \mathbf{p}) P(\mathbf{p}). \end{equation}$$Take a uniform prior for the parameters and the likelihood is just a Gaussian distribution. Therefore,
$$\begin{equation} P(\mathbf{p} | \mathcal{D}) \propto \exp\left[-\sum_i\frac{\left(\mathbf{\hat{y_i}} - \mathbf{y_i(\mathbf{p})} \right)^2}{2\sigma^2}\right]. \end{equation}$$We can use the emcee
module to draw samples.
In [3]:
def get_ymodel1(xparams, params, xdata):
finterp1 = interp1d(xparams, params, kind="cubic")
return finterp1(xdata)
def lnprob1(params, xparams, xdata, ydata):
ymodel = get_ymodel1(xparams, params, xdata)
sigma = 0.05 # known noise level
return -np.sum((ydata - ymodel)**2) / (2 * sigma**2)
In [4]:
ndim, nwalkers = 4, 100
p0 = np.random.random((nwalkers, ndim))
sampler1 = emcee.EnsembleSampler(nwalkers, ndim, lnprob1, args=[xparameters, xdata, ydata])
_ = sampler1.run_mcmc(p0, 1000)
In [5]:
nburn = 10000
params1 = sampler1.flatchain[nburn:,:]
plt.figure(figsize=(12,4))
for i in range(ndim):
plt.subplot(1, ndim, i+1)
ax = plt.gca()
plt.hist(params1[:,i], 50, normed=True, range=parameters[i] + np.array([-1, 1])*0.05)
ylim = ax.get_ylim()
plt.plot(np.zeros(2) + parameters[i], ylim, 'k--')
ax.set_ylim(ylim)
plt.title("parameter #%d" % (i+1))
plt.show()
for i in range(ndim):
print("parameter #%d: %f +- %f (true: %f)" % \
(i+1, np.median(params1[:,i]),
np.percentile(params1[:,i], 84) - np.percentile(params1[:,i], 16),
parameters[i]))
In [6]:
plt.figure(figsize=(8,4))
plt.plot(xdata, ydata, '.')
plt.plot(xdata, get_ymodel1(xparameters, np.median(params1, 0), xdata), '-', linewidth=2)
# unseen on the graph because it is too narrow
plt.fill_between(xdata,
get_ymodel1(xparameters, np.percentile(params1, 16, axis=0), xdata),
get_ymodel1(xparameters, np.percentile(params1, 84, axis=0), xdata), facecolor="orange")
plt.show()
As we can see, the retrieved parameters (i.e. the median) are very close to the true values.
Now we are going to a slightly less ideal world where we used an approximate model with known noise level. The approximate model here uses the quadratic and linear spline interpolation (the "quadratic" and "linear" model) instead of the cubic spline interpolation (the "cubic" model).
The inference is still the same as before, we just change how we calculate the model data in the lnprob1
function.
In [7]:
def get_ymodel2(xparams, params, xdata):
finterp1 = interp1d(xparams, params, kind="linear")
return finterp1(xdata)
def get_ymodel3(xparams, params, xdata):
finterp1 = interp1d(xparams, params, kind="quadratic")
return finterp1(xdata)
def lnprob2(params, xparams, xdata, ydata):
ymodel = get_ymodel2(xparams, params, xdata)
sigma = 0.05 # known noise level
return -np.sum((ydata - ymodel)**2) / (2 * sigma**2)
def lnprob3(params, xparams, xdata, ydata):
ymodel = get_ymodel3(xparams, params, xdata)
sigma = 0.05 # known noise level
return -np.sum((ydata - ymodel)**2) / (2 * sigma**2)
In [8]:
ndim, nwalkers = 4, 100
p0 = np.random.random((nwalkers, ndim))
sampler2 = emcee.EnsembleSampler(nwalkers, ndim, lnprob2, args=[xparameters, xdata, ydata])
_ = sampler2.run_mcmc(p0, 1000)
sampler3 = emcee.EnsembleSampler(nwalkers, ndim, lnprob3, args=[xparameters, xdata, ydata])
_ = sampler3.run_mcmc(p0, 1000)
In [9]:
nburn = 10000
params2 = sampler2.flatchain[nburn:,:]
params3 = sampler3.flatchain[nburn:,:]
plt.figure(figsize=(16,6))
for i in range(ndim):
for j in range(2):
paramss = params2 if j == 0 else params3
plt.subplot(2, ndim, j*ndim+i+1)
ax = plt.gca()
plt.hist(paramss[:,i], 50, normed=True, range=parameters[i] + np.array([-1, 1])*0.2)
ylim = ax.get_ylim()
plt.plot(np.zeros(2) + parameters[i], ylim, 'k--')
ax.set_ylim(ylim)
if j == 0:
plt.title("parameter #%d" % (i+1))
if i == 0:
plt.ylabel("%s model" % ("linear" if j == 0 else "quadratic"))
plt.show()
for j in range(2):
print("%s model" % ("linear" if j == 0 else "quadratic"))
paramss = params2 if j == 0 else params3
for i in range(ndim):
print("parameter #%d: %f +- %f (true: %f)" % \
(i+1, np.median(paramss[:,i]),
np.percentile(paramss[:,i], 84) - np.percentile(paramss[:,i], 16),
parameters[i]))
In [10]:
plt.figure(figsize=(16,4))
plt.subplot(1,2,1)
plt.plot(xdata, ydata, '.')
plt.plot(xdata, get_ymodel2(xparameters, np.median(params2, 0), xdata), '-', linewidth=2)
# unseen on the graph because it is too narrow
plt.fill_between(xdata,
get_ymodel2(xparameters, np.percentile(params2, 16, axis=0), xdata),
get_ymodel2(xparameters, np.percentile(params2, 84, axis=0), xdata), facecolor="orange")
plt.subplot(1,2,2)
plt.plot(xdata, ydata, '.')
plt.plot(xdata, get_ymodel3(xparameters, np.median(params3, 0), xdata), '-', linewidth=2)
# unseen on the graph because it is too narrow
plt.fill_between(xdata,
get_ymodel3(xparameters, np.percentile(params3, 16, axis=0), xdata),
get_ymodel3(xparameters, np.percentile(params3, 84, axis=0), xdata), facecolor="orange")
plt.show()
As we can see here, if we are using approximate models, the retrieved values of the parameteres are off while the produced error is still small. In other words, it is confident, but it is wrong. If we are using less accurate model, the effect is even worse: the predicted values are off by much and the error remains small.
The problem with the inference we used in this case is that we only assume that the data differ only by the Gaussian noise. This is not true since the data also differ due to the model inadequacy.
In my latest paper, I assume the model inadequacy as stochastic variables with distribution
$$\begin{equation} P(\eta(x)) \propto \exp\left[-\frac{\max\left(\mathbf{\hat{y_i}} - \mathbf{y_i}\right)^2}{2d^2}\right] \end{equation}$$with value of $d$ approximately the values of the noise.
Unfortunately, I didn't take into account the effect of the Gaussian noise. So the distribution above becomes the likelihood of observing the data.
In [11]:
def lnprob2_model1(params, xparams, xdata, ydata):
ymodel = get_ymodel2(xparams, params, xdata)
d = 0.05 # d approximately the noise level
return -np.max((ydata - ymodel)**2) / (2 * d**2)
def lnprob3_model1(params, xparams, xdata, ydata):
ymodel = get_ymodel3(xparams, params, xdata)
d = 0.05 # d approximately the noise level
return -np.max((ydata - ymodel)**2) / (2 * d**2)
In [12]:
ndim, nwalkers = 4, 100
p0 = np.random.random((nwalkers, ndim))
sampler2_model1 = emcee.EnsembleSampler(nwalkers, ndim, lnprob2_model1, args=[xparameters, xdata, ydata])
_ = sampler2_model1.run_mcmc(p0, 1000)
sampler3_model1 = emcee.EnsembleSampler(nwalkers, ndim, lnprob3_model1, args=[xparameters, xdata, ydata])
_ = sampler3_model1.run_mcmc(p0, 1000)
In [13]:
nburn = 10000
params2_model1 = sampler2_model1.flatchain[nburn:,:]
params3_model1 = sampler3_model1.flatchain[nburn:,:]
plt.figure(figsize=(16,6))
for i in range(ndim):
for j in range(2):
paramss = params2_model1 if j == 0 else params3_model1
plt.subplot(2, ndim, j*ndim+i+1)
ax = plt.gca()
plt.hist(paramss[:,i], 50, normed=True, range=parameters[i] + np.array([-1, 1])*0.2)
ylim = ax.get_ylim()
plt.plot(np.zeros(2) + parameters[i], ylim, 'k--')
ax.set_ylim(ylim)
if j == 0:
plt.title("parameter #%d" % (i+1))
if i == 0:
plt.ylabel("%s model" % ("linear" if j == 0 else "quadratic"))
plt.show()
for j in range(2):
print("%s model" % ("linear" if j == 0 else "quadratic"))
paramss = params2_model1 if j == 0 else params3_model1
for i in range(ndim):
print("parameter #%d: %f +- %f (true: %f)" % \
(i+1, np.median(paramss[:,i]),
np.percentile(paramss[:,i], 84) - np.percentile(paramss[:,i], 16),
parameters[i]))
In [15]:
plt.figure(figsize=(16,4))
plt.subplot(1,2,1)
plt.plot(xdata, ydata, '.', alpha=0.3)
plt.plot(xdata, get_ymodel2(xparameters, np.median(params2_model1, 0), xdata), '-', linewidth=2)
# unseen on the graph because it is too narrow
plt.fill_between(xdata,
get_ymodel2(xparameters, np.percentile(params2_model1, 16, axis=0), xdata),
get_ymodel2(xparameters, np.percentile(params2_model1, 84, axis=0), xdata),
facecolor="orange", alpha=0.5)
plt.subplot(1,2,2)
plt.plot(xdata, ydata, '.', alpha=0.3)
plt.plot(xdata, get_ymodel3(xparameters, np.median(params3_model1, 0), xdata), '-', linewidth=2)
# unseen on the graph because it is too narrow
plt.fill_between(xdata,
get_ymodel3(xparameters, np.percentile(params3_model1, 16, axis=0), xdata),
get_ymodel3(xparameters, np.percentile(params3_model1, 84, axis=0), xdata),
facecolor="orange", alpha=0.5)
plt.plot(xdata, get_ymodel1(xparameters, parameters, xdata), '-', linewidth=2)
nburn = 10000
params2 = sampler2.flatchain[nburn:,:]
params3 = sampler3.flatchain[nburn:,:]
plt.figure(figsize=(16,6))
for i in range(ndim):
for j in range(2):
paramss = params2 if j == 0 else params3
plt.subplot(2, ndim, j*ndim+i+1)
ax = plt.gca()
plt.hist(paramss[:,i], 50, normed=True, range=parameters[i] + np.array([-1, 1])*0.2)
ylim = ax.get_ylim()
plt.plot(np.zeros(2) + parameters[i], ylim, 'k--')
ax.set_ylim(ylim)
if j == 0:
plt.title("parameter #%d" % (i+1))
if i == 0:
plt.ylabel("%s model" % ("linear" if j == 0 else "quadratic"))
plt.show()
for j in range(2):
print("%s model" % ("linear" if j == 0 else "quadratic"))
paramss = params2 if j == 0 else params3
for i in range(ndim):
print("parameter #%d: %f +- %f (true: %f)" % \
(i+1, np.median(paramss[:,i]),
np.percentile(paramss[:,i], 84) - np.percentile(paramss[:,i], 16),
parameters[i]))
plt.show()
From the results above, this model of the model inadequacy works well if the model is approximately accurate. If the model is quite far from the true model, then the result is quite far off (compared to its standard deviation). Moreover, this model requires us to make an assumption on how right (or wrong) the model is by setting the value of $d$ manually.
What we want is minimal input from users and the model is automatically adjust the error on the retrieved parameters. If the model is less accurate, then the error on the retrieved parameters should be larger. If the model is more accurate, then the error should be smaller.
Another way to model the model inadequacy is using Gaussian Process.
In [28]:
from scipy.stats import multivariate_normal
def lnprob2_model2(params, xparams, xdata, ydata, dist):
d = params[-1]
m = params[-2]
sigma = 0.05
if d > 1 or d < 0 or m < 0 or sigma < 0: return -np.inf
ncut = 10
cov = m*m * np.exp(-dist[::ncut,::ncut]**2/(2*d*d)) + np.eye(len(xdata[::ncut])) * (sigma*sigma + 1e-7)
ymodel = get_ymodel2(xparams, params[:-3], xdata)
obs = ymodel - ydata
obs = obs[::ncut]
try:
return multivariate_normal.logpdf(obs, mean=np.zeros(obs.shape[0]), cov=cov) - np.log(sigma * m * d)
except:
return -np.inf
def lnprob3_model2(params, xparams, xdata, ydata, dist):
d = params[-1]
m = params[-2]
sigma = 0.05
if d > 1 or d < 0 or m < 0 or sigma < 0: return -np.inf
ncut = 10
cov = m*m * np.exp(-dist[::ncut,::ncut]**2/(2*d*d)) + np.eye(len(xdata[::ncut])) * (sigma*sigma + 1e-7)
ymodel = get_ymodel3(xparams, params[:-3], xdata)
obs = ymodel - ydata
obs = obs[::ncut]
try:
return multivariate_normal.logpdf(obs, mean=np.zeros(obs.shape[0]), cov=cov) - np.log(sigma * m * d)
except:
return -np.inf
In [29]:
import scipy.spatial.distance
dist = scipy.spatial.distance.squareform(scipy.spatial.distance.pdist(np.expand_dims(xdata, 1)))
In [30]:
ndim, nwalkers = 4, 100
nhparams = 3
p0 = np.random.random((nwalkers, ndim+nhparams))
sampler2_model2 = emcee.EnsembleSampler(nwalkers, ndim+nhparams, lnprob2_model2, args=[xparameters, xdata, ydata, dist])
%time _ = sampler2_model2.run_mcmc(p0, 1000)
sampler3_model2 = emcee.EnsembleSampler(nwalkers, ndim+nhparams, lnprob3_model2, args=[xparameters, xdata, ydata, dist])
%time _ = sampler3_model2.run_mcmc(p0, 1000)
In [ ]:
nburn = 10000
params2_model2 = sampler2_model2.flatchain[nburn:,:]
params3_model2 = sampler3_model2.flatchain[nburn:,:]
plt.figure(figsize=(16,6))
for i in range(ndim):
for j in range(2):
paramss = params2_model2 if j == 0 else params3_model2
plt.subplot(2, ndim, j*ndim+i+1)
ax = plt.gca()
plt.hist(paramss[:,i], 50, normed=True, range=parameters[i] + np.array([-1, 1])*2)
ylim = ax.get_ylim()
plt.plot(np.zeros(2) + parameters[i], ylim, 'k--')
ax.set_ylim(ylim)
if j == 0:
plt.title("parameter #%d" % (i+1))
if i == 0:
plt.ylabel("%s model" % ("linear" if j == 0 else "quadratic"))
plt.show()
for j in range(2):
print("%s model" % ("linear" if j == 0 else "quadratic"))
paramss = params2_model2 if j == 0 else params3_model2
for i in range(ndim):
print("parameter #%d: %f +- %f (true: %f)" % \
(i+1, np.median(paramss[:,i]),
np.percentile(paramss[:,i], 84) - np.percentile(paramss[:,i], 16),
parameters[i]))
In [ ]:
plt.figure(figsize=(16,4))
plt.subplot(1,2,1)
plt.plot(xdata, ydata, '.', alpha=0.3)
plt.plot(xdata, get_ymodel2(xparameters, np.median(params2_model2[:,:-nhparams], 0), xdata), '-', linewidth=2)
# unseen on the graph because it is too narrow
plt.fill_between(xdata,
get_ymodel2(xparameters, np.percentile(params2_model2[:,:-nhparams], 16, axis=0), xdata),
get_ymodel2(xparameters, np.percentile(params2_model2[:,:-nhparams], 84, axis=0), xdata),
facecolor="orange", alpha=0.5)
plt.subplot(1,2,2)
plt.plot(xdata, ydata, '.', alpha=0.3)
plt.plot(xdata, get_ymodel3(xparameters, np.median(params3_model2[:,:-nhparams], 0), xdata), '-', linewidth=2)
# unseen on the graph because it is too narrow
plt.fill_between(xdata,
get_ymodel3(xparameters, np.percentile(params3_model2[:,:-nhparams], 16, axis=0), xdata),
get_ymodel3(xparameters, np.percentile(params3_model2[:,:-nhparams], 84, axis=0), xdata),
facecolor="orange", alpha=0.5)
plt.plot(xdata, get_ymodel1(xparameters, parameters, xdata), '-', linewidth=2)
plt.show()
In [ ]: