This analysis is largely inspired from the following paper Monnier, N. (2012). Bayesian Approach to MSD-Based Analysis of Particle Motion in Live Cells. Biophysical Journal.
The idea is to classify particle motion in different biophysical model : diffusion, confined movement, direct, and so forth.
The input of the analysis is MSD curves of several particles (under same condition) and the output is a set of probability for different models.
For more details, the papier is available here : http://www.cell.com/biophysj/abstract/S0006-3495(12)00718-7
For a complete introduction of bayesian statistic, I strongly encourage you to read this excellent book : Bayesian Methods for Hackers.
TODO: introduce the theory
Monnier, N. (2012). Bayesian Approach to MSD-Based Analysis of Particle Motion in Live Cells. Biophysical Journal
In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
from scipy import io
from scipy import optimize
import pymc3 as pm
import theano
import theano.tensor as t
import matplotlib.pyplot as plt
In [2]:
# Chromosomes traj
mat = io.loadmat('chromosomes.mat')
msds = mat['MSD_curves_chromosomes']
msds = pd.DataFrame(msds)
msds["delay"] = mat['timelags'].T[0]
msds.set_index("delay", drop=True, inplace=True)
msds.head()
Out[2]:
Display all the MSD curves
In [3]:
fig, ax = plt.subplots(figsize=(10, 8))
msds.plot(ax=ax, legend=False)
ax.set_xlabel('Delay (s)')
ax.set_ylabel('MSD ($\mu m^2.s^{-1}$)')
Out[3]:
Display the average MSD (with std and sem)
In [4]:
msd_mean = msds.mean(axis=1)
msd_std = msds.std(axis=1)
msd_sem = msds.sem(axis=1)
fig, ax = plt.subplots(figsize=(10, 8))
msd_mean.plot(ax=ax, lw=2)
# std
ax.fill_between(msd_mean.index, msd_mean, msd_mean + msd_std, alpha=0.1)
ax.fill_between(msd_mean.index, msd_mean, msd_mean - msd_std, alpha=0.1)
# sem
ax.fill_between(msd_mean.index, msd_mean, msd_mean + msd_sem, alpha=0.2)
ax.fill_between(msd_mean.index, msd_mean, msd_mean - msd_sem, alpha=0.2)
ax.set_xlabel('Delay (s)')
ax.set_ylabel('MSD ($\mu m^2.s^{-1}$)')
Out[4]:
Matlab code is available here : http://msd-bayes.org/
In [40]:
# Get the average MSD
msd_mean = msds.mean(axis=1)
# Get difference between each individual curve and the mean curve
errors = msds.copy()
for i, col in msds.iteritems():
errors.loc[:, i] = col - msd_mean
# Calculate raw covariance matrix
error_cov_raw = np.cov(errors)
# Regularize covariance matrix (TODO)
error_cov = error_cov_raw.copy()
# Covariance of the mean curve
error_cov_raw /= errors.shape[0]
error_cov /= errors.shape[0]
Display the covariance matrix.
In [41]:
plt.figure(figsize=(8, 8))
plt.imshow(error_cov)
Out[41]:
In [11]:
# Purely diffusive model
def msd_model(tau, D_coeff):
return 6 * D_coeff * tau
msd_observed = msd_mean.copy()
tau = msd_mean.index
popt, pcov = optimize.curve_fit(msd_model, tau, msd_observed)
errors = np.sqrt(np.diag(pcov))
print("Estimate for D coeff is {:.2f} with variance = {:.5f}".format(popt[0], errors[0]))
In [101]:
# Constant model
def msd_model(tau, sigma_e):
return 6 * sigma_e ** 2
msd_observed = msd_mean.copy()
tau = msd_mean.index
popt, pcov = optimize.curve_fit(msd_model, tau, msd_observed)
errors = np.sqrt(np.diag(pcov))
print("Estimate for sigma_e is {:.2f} with variance = {:.5f}".format(popt[0], errors[0]))
In [106]:
# Diffusive + error model
def msd_model(tau, D_coeff, sigma_e):
return 6 * D_coeff * tau + 6 * sigma_e ** 2
msd_observed = msd_mean.copy()
tau = msd_mean.index
popt, pcov = optimize.curve_fit(msd_model, tau, msd_observed)
errors = np.sqrt(np.diag(pcov))
print("Estimate for D coeff is {:.2f} with variance = {:.5f}".format(popt[0], errors[0]))
print("Estimate for sigma_e is {:.2f} with variance = {:.5f}".format(popt[1], errors[1]))
See https://pymc-devs.github.io/pymc3/getting_started/#a-motivating-example-linear-regression for an introduction to PyMC3.
In [13]:
# Purely diffusive model
msd_observed = msd_mean.copy()
with pm.Model() as model:
D_coeff = pm.Uniform("D_coeff", lower=0, upper=1000)
tau = msd_mean.index
msd_model = 6 * D_coeff * tau
observation = pm.Normal("observation", mu=msd_model, observed=msd_observed)
step = pm.NUTS()
trace = pm.sample(1000, step, )
print("\nEstimate for D coeff is {:.2f} with variance unknown".format(trace["D_coeff"][-1]))
pm.traceplot(trace)
Out[13]:
In [10]:
# Constant model
msd_observed = msd_mean.copy()
with pm.Model() as model:
sigma_e = pm.Uniform("sigma_e", lower=0, upper=10)
tau = msd_mean.index
msd_model = 6 * sigma_e ** 2
observation = pm.Normal("observation", mu=msd_model, observed=msd_observed)
step = pm.NUTS()
trace = pm.sample(1000, step)
print("\nEstimate for sigma_e is {:.2f} with variance unknown".format(trace["sigma_e"][-1]))
pm.traceplot(trace)
Out[10]:
In [118]:
# Diffusive + error model
msd_observed = msd_mean.copy()
with pm.Model() as model:
D_coeff = pm.Uniform("D_coeff", lower=0, upper=1)
sigma_e = pm.Uniform("sigma_e", lower=0, upper=10)
tau = msd_mean.index
msd_model = 6 * sigma_e ** 2 + 6 * D_coeff * tau
observation = pm.Normal("observation", mu=msd_model, observed=msd_observed)
step = pm.NUTS()
trace = pm.sample(1000, step)
print("\nEstimate for D coeff is {:.2f} with variance unknown".format(trace["D_coeff"][-1]))
print("Estimate for sigma_e is {:.2f} with variance unknown".format(trace["sigma_e"][-1]))
pm.traceplot(trace)
Out[118]:
In [14]:
%matplotlib qt
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
from scipy import io
from scipy import optimize
import pymc3 as pm
import theano
import theano.tensor as t
import matplotlib.pyplot as plt
In [15]:
count_data = np.loadtxt("txtdata.csv")
plt.bar(np.arange(len(count_data)), count_data, color="#348ABD", edgecolor="none")
Out[15]:
In [17]:
count_data = np.loadtxt("txtdata.csv")
alpha = 1.0 / count_data.mean() # Recall count_data is the variable that holds our txt counts
with pm.Model() as model:
lambda_1 = pm.Exponential("lambda_1", alpha)
lambda_2 = pm.Exponential("lambda_2", alpha)
tau = pm.DiscreteUniform("tau", lower=0, upper=1000)
days = np.arange(len(count_data))
lambda_ = pm.switch(tau >= days, lambda_1, lambda_2)
observation = pm.Poisson("observation", mu=lambda_, observed=count_data)
step = pm.Metropolis()
trace = pm.sample(1000, step)
print()
print("tau", trace['tau'][-1])
print("lambda_1", trace['lambda_1'][-1])
print("lambda_2", trace['lambda_2'][-1])
In [21]:
pm.traceplot(trace)
Out[21]:
In [20]:
tau = trace['tau'][-1]
lambda_1 = trace['lambda_1'][-1]
lambda_2 = trace['lambda_2'][-1]
mcount = np.zeros(count_data.shape)
mcount[:tau] = lambda_1
mcount[tau:] = lambda_2
plt.figure(figsize=(10, 6))
plt.bar(np.arange(len(count_data)), count_data, color="#348ABD", edgecolor="none")
plt.plot(mcount, lw=4, color="#E24A33")
Out[20]:
In [20]:
count_data = np.loadtxt("txtdata.csv")
@theano.compile.ops.as_op(itypes=[t.lscalar, t.lscalar, t.dscalar, t.dscalar, t.dscalar], otypes=[t.dvector])
def lambda_(tau1, tau2, lambda_1, lambda_2, lambda_3):
out = np.zeros(len(count_data))
out[:tau1] = lambda_1 # lambda before tau is lambda1
out[tau1:tau2] = lambda_2 # lambda before tau is lambda1
out[tau2:] = lambda_3 # lambda after (and including) tau is lambda2
return out
alpha = 1.0 / count_data.mean() # Recall count_data is the variable that holds our txt counts
with pm.Model() as model:
lambda_1 = pm.Exponential("lambda_1", alpha)
lambda_2 = pm.Exponential("lambda_2", alpha)
lambda_3 = pm.Exponential("lambda_3", alpha)
tau1 = pm.DiscreteUniform("tau1", lower=0, upper=len(count_data))
tau2 = pm.DiscreteUniform("tau2", lower=0, upper=len(count_data))
observation = pm.Poisson("observation", mu=lambda_(tau1, tau2, lambda_1, lambda_2, lambda_3),
observed=count_data)
step = pm.Metropolis()
trace = pm.sample(500, step)
print()
print("tau1", trace['tau1'].mean())
print("tau2", trace['tau2'].mean())
print("lambda_1", trace['lambda_1'].mean())
print("lambda_2", trace['lambda_2'].mean())
print("lambda_3", trace['lambda_3'].mean())
In [21]:
pm.traceplot(trace)
Out[21]:
In [22]:
tau1 = trace['tau1'].mean()
tau2 = trace['tau2'].mean()
lambda_1 = trace['lambda_1'].mean()
lambda_2 = trace['lambda_2'].mean()
lambda_3 = trace['lambda_3'].mean()
mcount = np.zeros(count_data.shape)
mcount[:tau1] = lambda_1
mcount[tau1:tau2] = lambda_2
mcount[tau2:] = lambda_3
plt.figure(figsize=(10, 6))
plt.bar(np.arange(len(count_data)), count_data, color="#348ABD", edgecolor="none")
plt.plot(mcount, lw=4, color="#E24A33")
Out[22]: