This IPython Notebook is for performing a fit and generating a figure of the spectrum of sample VG05, with no gap present.

The filename of the figure is [TBD].pdf.

Author: Michael Gully-Santiago, gully@astro.as.utexas.edu

Date: February 16, 2015


In [1]:
%pylab inline
import emcee
import triangle
import pandas as pd
import seaborn as sns
from astroML.decorators import pickle_results


Populating the interactive namespace from numpy and matplotlib

In [2]:
sns.set_context("paper", font_scale=2.0, rc={"lines.linewidth": 2.5})
sns.set(style="ticks")

In [3]:
from etalon import *
np.random.seed(78704)

In [8]:
df = pd.read_csv('../data/cln_20150206_cary5000.csv', index_col=0)
df.columns


Out[8]:
Index([u'Baseline 100%T', u'empty_1', u'empty_2_1', u'empty_2_2', u'empty_2_3', u'empty_2_4', u'empty_2_5', u'empty_2_6', u'empty_2_7', u'empty_2_8', u'empty_2_9', u'empty_2_10', u'empty_2_11', u'empty_2_12', u'empty_2_13', u'empty_2_14', u'empty_2_15', u'empty_3_1', u'empty_3_2', u'empty_3_3', u'empty_3_4', u'empty_3_5', u'VG05_1_1', u'VG05_1_2', u'VG05_1_3', u'VG05_1_4', u'VG05_1_5', u'Baseline 100%T.1', u'empty_4_1', u'empty_4_2', u'VG09-12_1_0', u'VG09-12_1_2', u'VG09-12_1_4', u'VG09-12_1_6', u'VG09-12_1_8', u'VG09-12_1_10', u'VG09-12_1_12', u'VG09-12_1_14', u'VG09-12_1_16', u'VG09-12_1_18', u'VG09-12_1_20', u'VG09-12_1_22', u'VG09-12_1_24', u'VG09-12_1_26', u'VG09-12_1_28', u'VG09-12_1_30', u'VG09-12_1_32', u'VG09-12_1_34', u'VG09-12_1_36', u'VG09-12_1_38', u'VG09-12_1_40', u'VG09-12_1_42', u'VG09-12_1_44', u'VG09-12_1_46', u'VG09-12_1_48', u'VG09-12_1_50', u'empty_5_1', u'empty_5_2', u'empty_5_3', u'empty_5_4', u'empty_5_5'], dtype='object')

In [9]:
# Introduce the Real data, decimate the data.
x = df.index.values
N = len(x)
# Define T_DSP for the model
T_DSP = T_gap_Si(x, 0.0)
n1 = sellmeier_Si(x)

# Define uncertainty
yerr = 0.0002*np.ones(N)
yerr[(x > 1350) & (x < 1420)] = 0.0005 #higher noise in this region
iid_cov = np.diag(yerr ** 2)

# Select the spectrum of interest
# Normalize the spectrum by measured DSP Si wafer.
y = df['VG05_1_3'].values/df['VG05_1_1'].values

In [13]:
plt.step(x, y)


Out[13]:
[<matplotlib.lines.Line2D at 0x109daabd0>]

Define the likelihood.


In [17]:
def lnlike(d, f, lna, lns):
    a, s = np.exp(lna), np.exp(lns)
    off_diag_terms = a**2 * np.exp(-0.5 * (x[:, None] - x[None, :])**2 / s**2)
    C = iid_cov + off_diag_terms
    sgn, logdet = np.linalg.slogdet(C)
    if sgn <= 0:
        return -np.inf
    r = y - T_gap_Si_withFF_fast(x, d, f, n1)/T_DSP
    return -0.5 * (np.dot(r, np.linalg.solve(C, r)) + logdet)

Define the prior.


In [18]:
def lnprior(d, f, lna, lns):
    if not (0 < d < 100 and 0.0 < f < 1.0 and -12 < lna < -2 and 0 < lns < 10):
        return -np.inf
    return 0.0

Combine likelihood and prior to obtain the posterior.


In [19]:
def lnprob(p):
    lp = lnprior(*p)
    if not np.isfinite(lp):
        return -np.inf
    return lp + lnlike(*p)

Set up emcee.


In [20]:
@pickle_results('SiGaps_19_0gap.pkl')
def hammer_time(ndim, nwalkers, d_Guess, f_Guess, a_Guess, s_Guess, nburnins, ntrials):
    
    # Initialize the walkers
    p0 = np.array([d_Guess, f_Guess, np.log(a_Guess), np.log(s_Guess)])
    pos = [p0 + 1.0e-2*p0 * np.random.randn(ndim) for i in range(nwalkers)]
    
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob)
    
    pos, lp, state = sampler.run_mcmc(pos, nburnins)
    sampler.reset()
    pos, lp, state = sampler.run_mcmc(pos, ntrials)
    return sampler

Set up the initial conditions.


In [30]:
np.random.seed(78704)
ndim, nwalkers = 4, 32
d_Guess = 6.0
f_Guess = 0.98
a_Guess = 0.0008
s_Guess = 800.0
nburnins = 300
ntrials = 5000

Run the burn-in phase. Run the full MCMC. Pickle the results.


In [31]:
sampler = hammer_time(ndim, nwalkers, d_Guess, f_Guess, a_Guess, s_Guess, nburnins, ntrials)


@pickle_results: computing results and saving to 'SiGaps_19_0gap.pkl'
  warning: cache file 'SiGaps_19_0gap.pkl' exists
    - args match:   False
    - kwargs match: True

Linearize $a$ and $s$ for easy inspection of the values.


In [32]:
chain = sampler.chain
samples_lin = copy(sampler.flatchain)
samples_lin[:, 2:] = np.exp(samples_lin[:, 2:])

In [33]:
fig, axes = plt.subplots(4, 1, figsize=(5, 6), sharex=True)
fig.subplots_adjust(left=0.1, bottom=0.1, right=0.96, top=0.98,
                    wspace=0.0, hspace=0.05)
[a.plot(np.arange(chain.shape[1]), chain[:, :, i].T, "k", alpha=0.5)
 for i, a in enumerate(axes)]
[a.set_ylabel("${0}$".format(l)) for a, l in zip(axes, ["d", "f", "\ln a", "\ln s"])]
axes[-1].set_xlim(0, chain.shape[1])
axes[-1].set_xlabel("iteration");


Make a triangle corner plot.


In [34]:
fig = triangle.corner(samples_lin, 
                      labels=map("${0}$".format, ["d", "f", "a", "s"]), 
                      quantiles=[0.16, 0.84])


Quantiles:
[(0.16, 0.8753473089383379), (0.84, 10.684672856113849)]
Quantiles:
[(0.16, 0.030320118183754144), (0.84, 0.71598499079014566)]
Quantiles:
[(0.16, 0.00058255216536366088), (0.84, 0.0026620139854414901)]
Quantiles:
[(0.16, 1723.3448158840886), (0.84, 14765.382739279084)]

In [35]:
fig = triangle.corner(samples_lin[:,0:2], 
                      labels=map("${0}$".format, ["d", "f"]), 
                      quantiles=[0.16, 0.84])
plt.savefig("VG05_0gap_corner.pdf")


Quantiles:
[(0.16, 0.8753473089383379), (0.84, 10.684672856113849)]
Quantiles:
[(0.16, 0.030320118183754144), (0.84, 0.71598499079014566)]

In [36]:
d_mcmc, f_mcmc, a_mcmc, s_mcmc = map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]),
                             zip(*np.percentile(samples_lin, [16, 50, 84],
                                                axis=0)))
d_mcmc, f_mcmc, a_mcmc, s_mcmc


Out[36]:
((3.0824406858993436, 7.6022321702145055, 2.2070933769610059),
 (0.26014348483718536, 0.4558415059529603, 0.22982336665343123),
 (0.0011023607362411602, 0.0015596532492003299, 0.00051980857087749932),
 (5641.2781321955017, 9124.1046070835837, 3917.9333163114134))

In [37]:
print "{:.0f}^{{+{:.0f}}}_{{-{:.0f}}}".format(*d_mcmc)
print "{:.3f}^{{+{:.3f}}}_{{-{:.3f}}}".format(*f_mcmc)


3^{+8}_{-2}
0.260^{+0.456}_{-0.230}

In [38]:
plt.figure(figsize=(4,3))

for d, f, a, s in samples_lin[np.random.randint(len(samples_lin), size=60)]:
    off_diag_terms = a**2 * np.exp(-0.5 * (x[:, None] - x[None, :])**2 / s**2)
    C = iid_cov + off_diag_terms
    fit = T_gap_Si_withFF_fast(x, d, f, n1)/T_DSP
    vec = np.random.multivariate_normal(fit, C)
    plt.plot(x, vec,"-b", alpha=0.06)

plt.step(x, y,color="k", label='Measurement')
plt.errorbar(x, y,yerr=yerr, color="k")
fit = T_gap_Si_withFF_fast(x, d_mcmc[0], f_mcmc[0], n1)/T_DSP
fit_label = 'Model with $d={:.0f}$ nm, $f={:.3f}$'.format(d_mcmc[0], f_mcmc[0])
plt.plot(x, fit, '--', color=sns.xkcd_rgb["pale red"], alpha=1.0, label=fit_label)
plt.plot([-10, -9], [-10, -9],"-b", alpha=0.45, label='Draws from GP')
plt.plot([0, 5000], [1.0, 1.0], '-.k', alpha=0.5)
plt.fill_between([1200, 1250], 2.0, 0.0, hatch='\\', alpha=0.4, color='k', label='Si absorption cutoff')

plt.xlabel('$\lambda$ (nm)');
plt.ylabel('$T_{gap}$');
plt.xlim(1200, 1900);
plt.ylim(0.96, 1.019);
plt.legend(loc='lower right')
plt.savefig("VG05_0gap.pdf",  bbox_inches='tight')



In [41]:
ids = samples_lin[:,1] > 0.8
ids.sum()/(len(ids)*1.0)
np.percentile(samples_lin[ids, 0], [99.5])


Out[41]:
array([ 7.95680109])

The well-calibrated DSP noise spectrum rules out ($>3\sigma$) gap sizes larger than about 8.0 nm over $>80$\% of the measurement area.


In [ ]: