This IPython Notebook is for performing a fit and generating a figure of the spectrum of sample VG02, in a region demonstrating a good contacting, with negligible bond gap.

The filename of the figure is VG02.pdf.

Author: Michael Gully-Santiago, gully@astro.as.utexas.edu

Date: January 25, 2015



In [ ]:

    
%pylab inline
import emcee
import triangle
import pandas as pd
import seaborn as sns
from astroML.decorators import pickle_results



In [2]:

    
sns.set_context("paper", font_scale=2.0, rc={"lines.linewidth": 2.5})
sns.set(style="ticks")

Read in the data. We want "VG02_pos2" or "VG02_pos3"



In [3]:

    
df = pd.read_csv('../data/cln_20130218_cary5000.csv', index_col=0)
df.head()









    Out[3]:






  
    
      
      Baseline 100%T
      Baseline 0%T
      VG05
      VG01
      VG06
      VG02
      VG02_pos2
      VG02_pos3
      VG02_pos4
      VG03_pos1
      VG03_pos2
      VG04_pos1
      VG04_pos2
      VG06_post
      Baseline 100%T.1
      Baseline 0%T.1
      VG05_post
    
    
      wavelength
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2500
       7.427119
      -0.001234
       53.442204
       53.466736
       53.262798
       36.737545
       52.972309
       53.064991
       33.512653
       38.421932
       52.000980
       25.861622
       54.211266
       53.938526
       7.585160
       0.000067
       53.521866
    
    
      2490
       7.814320
      -0.001347
       53.430767
       53.484589
       53.288322
       36.678280
       53.001614
       53.104568
       33.588028
       37.254417
       52.001225
       25.784100
       54.293777
       53.990936
       7.991349
       0.000207
       53.393871
    
    
      2480
       8.378716
      -0.000838
       53.504936
       53.481739
       53.299366
       36.473946
       53.025131
       53.119553
       33.557705
       36.035366
       51.969082
       25.835318
       54.253708
       53.963161
       8.566405
       0.000034
       53.493465
    
    
      2470
       8.825397
      -0.001670
       53.377266
       53.474129
       53.299530
       36.204262
       53.002007
       53.103672
       33.459511
       34.838486
       51.941597
       25.706764
       54.266666
       53.970634
       9.022456
      -0.000127
       53.406708
    
    
      2460
       9.592031
      -0.001384
       53.376835
       53.469803
       53.290367
       35.955139
       53.008461
       53.092026
       33.415165
       33.569817
       51.898754
       25.675592
       54.245071
       53.950542
       9.799633
      -0.000737
       53.352882



In [4]:

    
df = df[df.index > 1250.0]

Import all the local models, saved locally as etalon.py. See the paper for derivations of these equations.



In [5]:

    
from etalon import *
np.random.seed(78704) #My old zip code

Determine the individual measurement uncertainty by taking the standard deviation of four mean-substracted reference measurements.



In [6]:

    
arr = np.vstack([df.VG05-df.VG05.mean(axis=0),
                 df.VG05_post-df.VG05_post.mean(axis=0),
                 df.VG06-df.VG06.mean(axis=0),
                 df.VG06_post-df.VG06_post.mean(axis=0)])



In [7]:

    
arr = np.vstack([df.VG05-df.VG05.mean(axis=0),
                 df.VG05_post-df.VG05_post.mean(axis=0),
                 df.VG06-df.VG06.mean(axis=0),
                 df.VG06_post-df.VG06_post.mean(axis=0)])

print arr.shape
arr_norm = arr - arr.mean(axis=0)
er = np.std(arr_norm, axis=0)/100.0
print er.shape









    



(4, 125)
(125,)



In [8]:

    
# Introduce the Real data
x = df.index.values
N = len(x)
# Define T_DSP for the model
T_DSP = T_gap_Si(x, 0.0)
n1 = sellmeier_Si(x)

# Define uncertainty
yerr = er# 0.0002*np.ones(N)
iid_cov = np.diag(yerr ** 2)

# Select the spectrum of interest
# Normalize the spectrum by measured DSP Si wafer.
y = df.VG02_pos3/(0.25*(df.VG05+df.VG05+df.VG06+df.VG06_post))



In [11]:

    
plot(x, er)









    Out[11]:





[<matplotlib.lines.Line2D at 0x1099cd150>]



In [12]:

    
np.median(er)









    Out[12]:





0.00028299047858754443



In [42]:

    
#np.savetxt("hetroscedastic.txt",er, delimiter=',', header='x, y')



In [13]:

    
errorbar(x,y, yerr=er)
plt.ylim(0.98, 1.01)









    Out[13]:





(0.98, 1.01)

Define the likelihood.



In [14]:

    
def lnlike(d, f, lna, lns):
    a, s = np.exp(lna), np.exp(lns)
    off_diag_terms = a**2 * np.exp(-0.5 * (x[:, None] - x[None, :])**2 / s**2)
    C = iid_cov + off_diag_terms
    sgn, logdet = np.linalg.slogdet(C)
    if sgn <= 0:
        return -np.inf
    r = y - T_gap_Si_withFF_fast(x, d, f, n1)/T_DSP
    return -0.5 * (np.dot(r, np.linalg.solve(C, r)) + logdet)

Define the prior.



In [15]:

    
def lnprior(d, f, lna, lns):
    if not (0 < d < 100 and 0.0 < f < 1.0 and -12 < lna < -2 and 0 < lns < 10):
        return -np.inf
    return 0.0

Combine likelihood and prior to obtain the posterior.



In [16]:

    
def lnprob(p):
    lp = lnprior(*p)
    if not np.isfinite(lp):
        return -np.inf
    return lp + lnlike(*p)

Set up emcee.



In [17]:

    
@pickle_results('SiGaps_13_VG02_noGap-sampler.pkl')
def hammer_time(ndim, nwalkers, d_Guess, f_Guess, a_Guess, s_Guess, nburnins, ntrials):
    
    # Initialize the walkers
    p0 = np.array([d_Guess, f_Guess, np.log(a_Guess), np.log(s_Guess)])
    pos = [p0 + 1.0e-2*p0 * np.random.randn(ndim) for i in range(nwalkers)]
    
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob)
    
    pos, lp, state = sampler.run_mcmc(pos, nburnins)
    sampler.reset()
    pos, lp, state = sampler.run_mcmc(pos, ntrials)
    return sampler

Set up the initial conditions.



In [18]:

    
np.random.seed(78704)
ndim, nwalkers = 4, 32
d_Guess = 15.0
f_Guess = 0.95
a_Guess = 0.0016
s_Guess = 500.0
nburnins = 300
ntrials = 9000

Run the burn-in phase. Run the full MCMC. Pickle the results.



In [19]:

    
sampler = hammer_time(ndim, nwalkers, d_Guess, f_Guess, a_Guess, s_Guess, nburnins, ntrials)









    



@pickle_results: using precomputed results from 'SiGaps_13_VG02_noGap-sampler.pkl'

Linearize $a$ and $s$ for easy inspection of the values.



In [20]:

    
chain = sampler.chain
samples_lin = copy(sampler.flatchain)
samples_lin[:, 2:] = np.exp(samples_lin[:, 2:])

Inspect the chain.



In [21]:

    
fig, axes = plt.subplots(4, 1, figsize=(5, 6), sharex=True)
fig.subplots_adjust(left=0.1, bottom=0.1, right=0.96, top=0.98,
                    wspace=0.0, hspace=0.05)
[a.plot(np.arange(chain.shape[1]), chain[:, :, i].T, "k", alpha=0.5)
 for i, a in enumerate(axes)]
[a.set_ylabel("${0}$".format(l)) for a, l in zip(axes, ["d", "f", "\ln a", "\ln s"])]
axes[-1].set_xlim(0, chain.shape[1])
axes[-1].set_xlabel("iteration");

Make a triangle corner plot.



In [22]:

    
fig = triangle.corner(samples_lin, 
                      labels=map("${0}$".format, ["d", "f", "a", "s"]), 
                      quantiles=[0.16, 0.84])









    



Quantiles:
[(0.16, 9.2917942238417428), (0.84, 40.707619886479556)]
Quantiles:
[(0.16, 0.064084108048825791), (0.84, 0.73836476641700866)]
Quantiles:
[(0.16, 0.0026176884201528023), (0.84, 0.0042281955289535707)]
Quantiles:
[(0.16, 123.1130532646567), (0.84, 143.9008012107891)]



In [23]:

    
fig = triangle.corner(samples_lin[:,0:2], 
                      labels=map("${0}$".format, ["d", "f"]), 
                      quantiles=[0.16, 0.84])
plt.savefig("VG02p3_corner.pdf")









    



Quantiles:
[(0.16, 9.2917942238417428), (0.84, 40.707619886479556)]
Quantiles:
[(0.16, 0.064084108048825791), (0.84, 0.73836476641700866)]






    



/Users/gully/anaconda/lib/python2.7/site-packages/matplotlib/backends/backend_pdf.py:2264: FutureWarning: comparison to `None` will result in an elementwise object comparison in the future.
  different = bool(ours != theirs)

Calculate confidence intervals.



In [24]:

    
d_mcmc, f_mcmc, a_mcmc, s_mcmc = map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]),
                             zip(*np.percentile(samples_lin, [16, 50, 84],
                                                axis=0)))
d_mcmc, f_mcmc, a_mcmc, s_mcmc









    Out[24]:





((17.593240091088759, 23.114379795390796, 8.3014458672470166),
 (0.31743369524697829, 0.42093107117003037, 0.25334958719815248),
 (0.0032783203320604086, 0.00094987519689316213, 0.00066063191190760635),
 (133.09601972805291, 10.804781482736189, 9.9829664633962096))



In [25]:

    
print "{:.0f}^{{+{:.0f}}}_{{-{:.0f}}}".format(*d_mcmc)
print "{:.3f}^{{+{:.3f}}}_{{-{:.3f}}}".format(*f_mcmc)









    



18^{+23}_{-8}
0.317^{+0.421}_{-0.253}

Overlay draws from the Gaussian Process.



In [30]:

    
plt.figure(figsize=(6,3))

for d, f, a, s in samples_lin[np.random.randint(len(samples_lin), size=60)]:
    off_diag_terms = a**2 * np.exp(-0.5 * (x[:, None] - x[None, :])**2 / s**2)
    C = iid_cov + off_diag_terms
    fit = T_gap_Si_withFF_fast(x, d, f, n1)/T_DSP
    vec = np.random.multivariate_normal(fit, C)
    plt.plot(x, vec,"-b", alpha=0.06)

plt.step(x, y,color="k", label='Measurement')
plt.errorbar(x, y,yerr=er, color="k")
fit = T_gap_Si_withFF_fast(x, d_mcmc[0], f_mcmc[0], n1)/T_DSP
fit_label = 'Model with $d={:.0f}$ nm, $f={:.3f}$'.format(d_mcmc[0], f_mcmc[0])
plt.plot(x, fit, '--', color=sns.xkcd_rgb["pale red"], alpha=1.0, label=fit_label)
plt.plot([-10, -9], [-10, -9],"-b", alpha=0.45, label='Draws from GP')
plt.plot([0, 5000], [1.0, 1.0], '-.k', alpha=0.5)
plt.fill_between([1200, 1250], 2.0, 0.0, hatch='\\', alpha=0.4, color='k', label='Si absorption cutoff')

plt.xlabel('$\lambda$ (nm)');
plt.ylabel('$T_{gap}$');
plt.xlim(1200, 2501);
plt.ylim(0.96, 1.019);
plt.legend(loc='lower right')
plt.savefig("VG02_noGap.pdf",  bbox_inches='tight')

The end.



In [ ]:

	Baseline 100%T	Baseline 0%T	VG05	VG01	VG06	VG02	VG02_pos2	VG02_pos3	VG02_pos4	VG03_pos1	VG03_pos2	VG04_pos1	VG04_pos2	VG06_post	Baseline 100%T.1	Baseline 0%T.1	VG05_post
wavelength
2500	7.427119	-0.001234	53.442204	53.466736	53.262798	36.737545	52.972309	53.064991	33.512653	38.421932	52.000980	25.861622	54.211266	53.938526	7.585160	0.000067	53.521866
2490	7.814320	-0.001347	53.430767	53.484589	53.288322	36.678280	53.001614	53.104568	33.588028	37.254417	52.001225	25.784100	54.293777	53.990936	7.991349	0.000207	53.393871
2480	8.378716	-0.000838	53.504936	53.481739	53.299366	36.473946	53.025131	53.119553	33.557705	36.035366	51.969082	25.835318	54.253708	53.963161	8.566405	0.000034	53.493465
2470	8.825397	-0.001670	53.377266	53.474129	53.299530	36.204262	53.002007	53.103672	33.459511	34.838486	51.941597	25.706764	54.266666	53.970634	9.022456	-0.000127	53.406708
2460	9.592031	-0.001384	53.376835	53.469803	53.290367	35.955139	53.008461	53.092026	33.415165	33.569817	51.898754	25.675592	54.245071	53.950542	9.799633	-0.000737	53.352882