In [1]:

    
#%load /Users/gully/astroML/bombcat/DFM_GP_example.py

Author: gully

Date: Jan 9, 2015

Experiments with `Scipy.optimize` functions

Originals available at: https://github.com/dfm/gp-tutorial & https://speakerdeck.com/dfm/an-astronomers-introduction-to-gaussian-processes



In [41]:

    
%pylab inline
# Auto imports np and plt









    



Populating the interactive namespace from numpy and matplotlib






    



WARNING: pylab import has clobbered these variables: ['hist', 'bivariate_normal']
`%matplotlib` prevents importing * from pylab and numpy



In [42]:

    
from matplotlib import rcParams
#rcParams["savefig.dpi"] = 150
import emcee  # http://dan.iel.fm/emcee
import triangle  # https://github.com/dfm/triangle.py
import numpy as np
import matplotlib.pyplot as plt
from astroML.plotting import hist
from astroML.stats.random import bivariate_normal
from matplotlib.patches import Ellipse
import timeit
#from IPython.display import display, Math, Latex



In [43]:

    
import seaborn as sns
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
cmap = sns.cubehelix_palette(light=1, as_cmap=True)
sns.palplot(sns.cubehelix_palette(light=1))

Non-linear physical optics example



In [44]:

    
from etalon import *

np.random.seed(123456)

# Introduce the True data
x = np.arange(1200.0, 2501.0, 10.0)
N = len(x)

d_True = 4120.0
f_True = 0.045
T_DSP = T_gap_Si(x, 0.0)
y_True = T_gap_Si_withFF(x, d_True, f_True)/T_DSP

#Introduce some noise with both measurement uncertainties 
#   and non-trivial correlated errors.

yerr = 0.0001 + 0.0003*np.random.rand(N)
yerr_hom = 0.0002*np.ones(N)
hom_cov = np.diag(yerr_hom ** 2)
iid_cov = np.diag(yerr ** 2)

a_True = 0.002
s_True = 12.0
true_cov = a_True**2.0 * np.exp(-0.5 * (x[:, None]-x[None, :])**2 / s_True**2) + iid_cov
y = np.random.multivariate_normal(y_True, true_cov)
#y = np.random.multivariate_normal(y, iid_cov)

Plot III: Data vs 'truth'

And plot the data with the observational uncertainties. The true line is plotted in black.



In [45]:

    
plt.errorbar(x, y, yerr=yerr, fmt=".k", capsize=0)
plt.plot(x, y_True, "-r", lw=2, alpha=0.3)
plt.xlabel('$\lambda$');
plt.ylabel('$T_{gap}$');
plt.title('Synthetic Si Transmission');
plt.ylim(0.9, 1.01);



In [45]:

The following example follows from the Enthought Training on Demand videos.

https://training.enthought.com/course/SCIPY/lecture/SCIPY_CURVE_FITTING_CALLABLES



In [15]:

    
n1 = sellmeier_Si(x)



In [21]:

    
# Arguments are parameters, y, x
def f_err(p, y, x, n):
    res = y - T_gap_Si_withFF_fast(x, p[0], p[1], n)
    return res



In [22]:

    
import scipy.optimize as optimize



In [27]:

    
guess = [d_True*0.995, f_True*1.01]



In [28]:

    
c, ret_val = optimize.leastsq(f_err, guess, args=(y, x, n1))



In [29]:

    
ret_val









    Out[29]:





1



In [30]:

    
c









    Out[30]:





array([  4.49347813e+03,  -1.88206243e+00])



In [68]:

    
p_est, err_est = optimize.curve_fit(T_gap_Si_withFF, x, y, p0=(4115.0, 0.048))



In [69]:

    
p_est









    Out[69]:





array([  4.49347976e+03,  -1.88206273e+00])

Result is a failure

Constructing a guess at the covariance matrix

The off-diagonal terms are characterized by parameters $a$ and $s$.

The term $a$ controls the strength of the interaction. The term $s$ controls the range of correlation.

First we define a natural log likelihood function. You hand it a $d$, a $f$, an $a$, and an $s$ and it gives you the likelihood of the data: $\ln{p}=\ln{p(y|d,f,a,s)}$

Note: x is not passed to this function. The way Python works, x, y, and iid_cov are searched for in the local namespace, then global namespace.



In [46]:

    
n1 = sellmeier_Si(x)



In [47]:

    
def lnlike(d, f):
    C = iid_cov
    sgn, logdet = np.linalg.slogdet(C)
    if sgn <= 0:
        return -np.inf
    r = y - T_gap_Si_withFF_fast(x, d, f, n1)/T_DSP
    return -0.5 * (np.dot(r, np.linalg.solve(C, r)) + logdet)



In [48]:

    
# Apply a uniform prior over some range.
# Shape params are uniform in log space 
def lnprior(d, f):
    if not (3900 < d < 4300 and 0 < f < 1):
        return -np.inf
    return 0.0



In [49]:

    
def lnprob(p):
    lp = lnprior(*p)
    if not np.isfinite(lp):
        return -np.inf
    return lp + lnlike(*p)

Run `emcee`!



In [50]:

    
ndim, nwalkers = 2, 32
p0 = np.array([d_True, f_True]) # Excellent guess
pos = [p0 + 1.0e-3*p0 * np.random.randn(ndim) for i in range(nwalkers)]

sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob)



In [57]:

    
# This is the burn-in
pos, lp, state = sampler.run_mcmc(pos, 300)



In [58]:

    
sampler.reset()
pos, lp, state = sampler.run_mcmc(pos, 1000)
chain = sampler.chain



In [59]:

    
fig, axes = plt.subplots(2, 1, figsize=(5, 6), sharex=True)
fig.subplots_adjust(left=0.1, bottom=0.1, right=0.96, top=0.98,
                    wspace=0.0, hspace=0.05)
[a.plot(np.arange(chain.shape[1]), chain[:, :, i].T, "k", alpha=0.5)
 for i, a in enumerate(axes)]
[a.set_ylabel("${0}$".format(l)) for a, l in zip(axes, ["d", "f"])]
axes[-1].set_xlim(0, chain.shape[1])
axes[-1].set_xlabel("iteration");



In [60]:

    
samples = sampler.flatchain
samples_lin = copy(samples)



In [61]:

    
fig = triangle.corner(samples_lin, 
                      labels=map("${0}$".format, ["d", "f"]),
                       truths=[d_True, f_True])



In [71]:

    
for d, f in samples[np.random.randint(len(samples), size=100)]:
    plt.plot(x, T_gap_Si_withFF_fast(x, d, f, n1)/T_DSP, color="b", alpha=0.03)

plt.errorbar(x, y, yerr=yerr, fmt=".k", capsize=0)
plt.plot(x, y_True, "--r", lw=2, alpha=1.0)
plt.xlabel('$\lambda$');
plt.ylabel('$T_{gap}$');
plt.title('100 samples from posterior');
plt.ylim(0.9, 1.01);



In [64]:

    
d_mcmc, f_mcmc= map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]),
                             zip(*np.percentile(samples_lin, [16, 50, 84],
                                                axis=0)))



In [65]:

    
d_mcmc, f_mcmc









    Out[65]:





((4120.6544036936675, 0.23887836994072131, 0.23512448507790396),
 (0.045378338182609342, 4.0953619070277503e-05, 4.119209296057913e-05))



In [67]:

    
for d, f in samples_lin[np.random.randint(len(samples), size=30)]:
    #plt.plot(x, T_gap_Si_withFF_fast(x, d, f, n1)/T_DSP, color="b", alpha=0.03)
    C = iid_cov
    fit = T_gap_Si_withFF_fast(x, d, f, n1)/T_DSP
    vec = np.random.multivariate_normal(fit, C)
    plt.plot(x, vec, color="g", alpha=0.03)
    
plt.errorbar(x, y, fmt=".k")
plt.xlabel('$\lambda$');
plt.ylabel('$T_{gap}$');
plt.title('Synthetic Si Transmission');
plt.ylim(0.9, 1.01);



In [ ]: