In [ ]:
measurement_id = '17d' # possible values: '7d', '12d', '17d'

Notebook arguments

  • measurement_id (string): Select the measurement. Valid values: '7d', '12d', '17d'.

8-spot kinetics

This notebook executes the realtime-kinetics analysis.

The first cell of this notebook selects which measurement is analyzed. Measurements can be processed one-by-one, by manually running this notebook, or in batch by using the batch execution notebook.

Loading the software


In [ ]:
from IPython.display import display
from pathlib import Path
import pandas as pd
from scipy.stats import linregress

In [ ]:
from fretbursts import *

In [ ]:
sns = init_notebook(fs=14)

In [ ]:
import lmfit; lmfit.__version__

In [ ]:
import phconvert; phconvert.__version__

Selecting a data file


In [ ]:
#filename = OpenFileDialog('C:/Data/Antonio/data/2015-07-29/')
#filename

In [ ]:
from pathlib import Path

In [ ]:
dir_ = r'C:\Data\Antonio\data\8-spot 5samples data\2013-05-15/'

filenames = [str(f) for f in Path(dir_).glob('*.hdf5')]
filenames

In [ ]:
keys = [f.stem.split('_')[0] for f in Path(dir_).glob('*.hdf5')]
keys

In [ ]:
filenames_dict = {k: v for k, v in zip(keys, filenames)}
filenames_dict

In [ ]:
filename = filenames_dict[measurement_id]
filename

Load and process the data:


In [ ]:
d = loader.photon_hdf5(filename)

In [ ]:
d.time_max

Compute background and burst search:


In [ ]:
d.calc_bg(bg.exp_fit, time_s=10, tail_min_us='auto', F_bg=1.7)

Let's take a look at the photon waiting times histograms and at the fitted background rates:


In [ ]:
dplot(d, hist_bg);

In [ ]:
dplot(d, timetrace_bg);

Burst selection and FRET


In [ ]:
%%timeit -n1 -r1
d.burst_search(m=10, F=5, ph_sel=Ph_sel(Dex='DAem'))

In [ ]:
ds = d.select_bursts(select_bursts.size, th1=30)

Selecting bursts by burst size (select_bursts.size)


In [ ]:
dplot(ds, hist_fret, pdf=False);

In [ ]:
dm0 = ds.collapse()

In [ ]:
dplot(dm0, hist_fret, pdf=False);

In [ ]:
weights = None
bext.bursts_fitter(dm0, weights=weights)
dm0.E_fitter.fit_histogram(mfit.factory_two_gaussians(p1_center=0.04, p2_center=0.3), verbose=False)
dplot(dm0, hist_fret, show_model=True, weights=weights);
dm0.E_fitter.params

In [ ]:
params_gauss0 = dm0.E_fitter.params.copy()

Fit


In [ ]:
from scipy import optimize

In [ ]:
params_fixed = dict(
    mu1=float(params_gauss0.p1_center),
    mu2=float(params_gauss0.p2_center),
    sig1=float(params_gauss0.p1_sigma),
    sig2=float(params_gauss0.p2_sigma),
)

In [ ]:
def em_weights_2gauss(x, a2, mu1, mu2, sig1, sig2):
    """Responsibility function for a 2-Gaussian model.
    
    Return 2 arrays of size = x.size: the responsibility of 
    each Gaussian population.
    """
    a1 = 1 - a2
    assert np.abs(a1 + a2 - 1) < 1e-3
    f1 = a1 * gauss_pdf(x, mu1, sig1)
    f2 = a2 * gauss_pdf(x, mu2, sig2)
    γ1 = f1 / (f1 + f2)
    γ2 = f2 / (f1 + f2)
    return γ1, γ2

In [ ]:
def em_fit_2gauss(x, a2_0, params_fixed, print_every=10, max_iter=100, rtol=1e-3):
    a2_new = a2_0
    rel_change = 1
    i = 0
    while rel_change > rtol and i < max_iter:

        # E-step
        γ1, γ2 = em_weights_2gauss(x, a2_new, **params_fixed)
        assert np.allclose(γ1.sum() + γ2.sum(), x.size)

        # M-step
        a2_old = a2_new
        a2_new = γ2.sum()/γ2.size

        # Convergence
        rel_change = np.abs((a2_old - a2_new)/a2_new)
        i += 1
        if (i % print_every) == 0:
            print(i, a2_new, rel_change)
        
    return a2_new, i

In [ ]:
from matplotlib.pylab import normpdf as gauss_pdf

# Model PDF to be maximized
def model_pdf(x, a2, mu1, mu2, sig1, sig2):
    a1 = 1 - a2
    #assert np.abs(a1 + a2 + a3 - 1) < 1e-3
    return (a1 * gauss_pdf(x, mu1, sig1) + 
            a2 * gauss_pdf(x, mu2, sig2))

def func2min_lmfit(params, x):
    a2 = params['a2'].value
    mu1 = params['mu1'].value
    mu2 = params['mu2'].value
    sig1 = params['sig1'].value
    sig2 = params['sig2'].value
    return -np.sqrt(np.log(model_pdf(x, a2, mu1, mu2, sig1, sig2)))

def func2min_scipy(params_fit, params_fixed, x):
    a2 = params_fit
    mu1 = params_fixed['mu1']
    mu2 = params_fixed['mu2']
    sig1 = params_fixed['sig1']
    sig2 = params_fixed['sig2']
    return -np.log(model_pdf(x, a2, mu1, mu2, sig1, sig2)).sum()

# create a set of Parameters
params = lmfit.Parameters()
params.add('a2', value=0.5, min=0)
for k, v in params_fixed.items():
    params.add(k, value=v, vary=False)
$$f(x) = \frac{A}{\sigma\sqrt{2\pi}}\, e^{-\frac{(x - \mu)^2}{2 \sigma^2}}$$$$\log f(x) = \log \frac{A}{\sigma\sqrt{2\pi}}\, e^{-\frac{(x - \mu)^2}{2 \sigma^2}} = \log{A} -\log{\sigma} - \log\sqrt{2\pi} -\frac{(x - \mu)^2}{2 \sigma^2}$$$$w_1 \; f_1(x) + w_2 \; f_2(x) + w_3 \; f_3(x)$$$$\log (w_1 \; f_1(x)) = \log{w_1} + \log{f_1(x)}$$

In [ ]:
x = dm0.E_
x

In [ ]:
res_em = em_fit_2gauss(x, 0.5, params_fixed)
res_em

In [ ]:
#optimize.brute(func2min_scipy, ranges=((0.01, 0.99), (0.01, 0.99)), Ns=101, args=(params, x))

In [ ]:
res = optimize.minimize(func2min_scipy, x0=[0.5], args=(params_fixed, x), method='Nelder-Mead')
res

In [ ]:
res = optimize.minimize(func2min_scipy, x0=[0.5], args=(params_fixed, x), bounds=((0,1),), method='SLSQP')
res

res = optimize.minimize(func2min_scipy, x0=[0.5], args=(params_fixed, x), bounds=((0,1),), method='TNC')
res

In [ ]:
bins = np.arange(-0.1, 1.1, 0.025)
plt.hist(x, bins, histtype='step', lw=2, normed=True);
xx = np.arange(-0.1, 1.1, 0.005)
#plt.plot(xx, model_pdf(xx, params))
plt.plot(xx, model_pdf(xx, a2=res_em[0], **params_fixed))

Kinetics


In [ ]:
mfit.factory_two_gaussians()

In [ ]:
def _kinetics_fit_em(dx, a2_0, params_fixed, **kwargs):
    kwargs = {'max_iter': 100, 'print_every': 101, **kwargs}
    a2, i = em_fit_2gauss(dx.E_, a2_0, params_fixed, **kwargs)
    return a2, i < kwargs['max_iter']

def _kinetics_fit_ll(dx, a2_0, params_fixed, **kwargs):
    kwargs = {'method':'Nelder-Mead', **kwargs}
    res = optimize.minimize(func2min_scipy, x0=[a2_0], args=(params_fixed, dx.E_), 
                            **kwargs)
    return res.x[0], res.success
    
def _kinetics_fit_hist(dx, a2_0, params_fixed):
    E_fitter = bext.bursts_fitter(dx)
    model = mfit.factory_two_gaussians()
    model.set_param_hint('p1_center', value=params_fixed['mu1'], vary=False)
    model.set_param_hint('p2_center', value=params_fixed['mu2'], vary=False)
    model.set_param_hint('p1_sigma', value=params_fixed['sig1'], vary=False)
    model.set_param_hint('p2_sigma', value=params_fixed['sig2'], vary=False)
    E_fitter.fit_histogram(model, verbose=False)
    return (float(E_fitter.params.p2_amplitude), 
            dx.E_fitter.fit_res[0].success)

def kinetics_fit(ds_slices, params_fixed, a2_0=0.5, method='em', **method_kws):
    fit_func = {
        'em': _kinetics_fit_em, 
        'll': _kinetics_fit_ll,
        'hist': _kinetics_fit_hist}
    
    fit_list = []
    for dx in ds_slices:
        a2, success = fit_func[method](dx, a2_0, params_fixed, **method_kws)
        df_i = pd.DataFrame(data=dict(p2_amplitude=a2,
                                      p1_center=params_fixed['mu1'], p2_center=params_fixed['mu2'], 
                                      p1_sigma=params_fixed['sig1'], p2_sigma=params_fixed['sig2'],
                                      tstart=dx.slice_tstart, tstop=dx.slice_tstop, 
                                      tmean=0.5*(dx.slice_tstart + dx.slice_tstop)), 
                            index=[0.5*(dx.slice_tstart + dx.slice_tstop)])
        if not success:
            print('* ', end='', flush=True)
            continue
        
        fit_list.append(df_i)
    print(flush=True)
    return pd.concat(fit_list)

In [ ]:
dsc = ds.collapse()

In [ ]:
def print_slices(moving_window_params):
    msg = ' - Slicing measurement:'
    for name in ('start', 'stop', 'step', 'window'):
        msg += ' %s = %.1fs' % (name, moving_window_params[name]) 
    print(msg, flush=True)
    num_slices = len(bext.moving_window_startstop(**moving_window_params))
    print('   Number of slices %d' % num_slices, flush=True)

In [ ]:
step = 1
params = {}
for window in (5, 30):
    moving_window_params = dict(start=0, stop=dsc.time_max, step=step, window=window)
    print_slices(moving_window_params)
    
    ds_slices = bext.moving_window_chunks(dsc, start=0, stop=600*60, step=step, window=window)
    for meth in ['em', 'll', 'hist']:
        print('    >>> Fitting method %s' % meth, flush=True)
        p = kinetics_fit(ds_slices, params_fixed, method=meth)
        p['kinetics'] = p.p2_amplitude
        slope, intercept, r_value, p_value, std_err = linregress(p.tstart, p.kinetics)
        y_model = p.tstart*slope + intercept
        p['kinetics_linregress'] = y_model
        params[meth, window, step] = p

Number of bursts


In [ ]:
moving_window_params

In [ ]:
df = bext.moving_window_dataframe(**moving_window_params)
df['size_mean'] = [di.nt_.mean() for di in ds_slices]
df['size_max'] = [di.nt_.max() for di in ds_slices]
df['num_bursts'] = [di.num_bursts[0] for di in ds_slices]
df['burst_width'] = [di.mburst_.width.mean()*di.clk_p*1e3 for di in ds_slices]

In [ ]:
labels = ('num_bursts', 'burst_width')
fig, axes = plt.subplots(len(labels), 1, figsize=(12, 3*len(labels)))

for ax, label in zip(axes, labels):
    ax.plot(label, data=df)
    ax.legend(loc='best')
    #ax.set_ylim(0)

In [ ]:
x, nb = df['tstart'], df['num_bursts']
slope, intercept, r_value, p_value, std_err = linregress(x, nb)
y_model = x*slope + intercept

In [ ]:
plt.plot(x, nb)
plt.plot(x, y_model)
print("Number of bursts: %.1f MEAN, %.1f VAR, %.3f VAR/MEAN" % 
      (nb.mean(), nb.var(), nb.var()/nb.mean()))

In [ ]:
nb_corr = (nb - y_model) + nb.mean()
plt.plot(nb_corr)
plt.plot(x, np.repeat(nb.mean(), x.size))
print("Number of bursts: %.1f MEAN, %.1f VAR, %.3f VAR/MEAN" % 
      (nb_corr.mean(), nb_corr.var(), nb_corr.var()/nb_corr.mean()))

In [ ]:
df['num_bursts_detrend'] = nb_corr
df['num_bursts_linregress'] = y_model

In [ ]:
out_fname = 'results/%s_burst_data_vs_time__window%ds_step%ds.txt' % (
    Path(filename).stem, moving_window_params['window'], moving_window_params['step'])
out_fname

In [ ]:
df.to_csv(out_fname)

Population fraction


In [ ]:
methods = ('em', 'll', 'hist')

In [ ]:
for meth in methods:
    plt.figure(figsize=(14, 3))
    plt.plot(params['em', 5, 1].index, params['em', 5, 1].kinetics, 'h', color='gray', alpha=0.2)
    plt.plot(params['em', 30, 1].index, params['em', 30, 1].kinetics, 'h', alpha=0.3)

In [ ]:
step = 1
for window in (5, 30):
    for meth in methods:
        out_fname = ('results/' + Path(filename).stem + 
                     '_%sfit_ampl_only__window%ds_step%ds.txt' % (meth, window, step))
        print('- Saving: ', out_fname)
        params[meth, window, step].to_csv(out_fname)

In [ ]:
d

In [ ]:


In [ ]: