In this notebook we play with data generated in the 'Just Modeling - Two Component Binding' notebook, and see how our bayesian models do at reproducing the Kd.
In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pymc
import seaborn as sns
%pylab inline
We use the same setup here as we did in 'Just Modeling - Two Component Binding'.
Experimentally we won't know the Kd, but we know the P, PL, and L concentrations.
In [2]:
Kd = 2e-9 # M
In [3]:
Ptot = 1e-9 # M
In [4]:
Ltot = 20.0e-6 / np.array([10**(float(i)/2.0) for i in range(12)]) # M
In [5]:
def two_component_binding(Kd, Ptot, Ltot):
"""
Parameters
----------
Kd : float
Dissociation constant
Ptot : float
Total protein concentration
Ltot : float
Total ligand concentration
Returns
-------
P : float
Free protein concentration
L : float
Free ligand concentration
PL : float
Complex concentration
"""
PL = 0.5 * ((Ptot + Ltot + Kd) - np.sqrt((Ptot + Ltot + Kd)**2 - 4*Ptot*Ltot)) # complex concentration (uM)
P = Ptot - PL; # free protein concentration in sample cell after n injections (uM)
L = Ltot - PL; # free ligand concentration in sample cell after n injections (uM)
return [P, L, PL]
[L, P, PL] = two_component_binding(Kd, Ptot, Ltot)
In [6]:
# y will be complex concentration
# x will be total ligand concentration
plt.semilogx(Ltot,PL, 'o')
plt.xlabel('$[L]_{tot}$ / M')
plt.ylabel('$[PL]$ / M')
plt.ylim(0,1.3e-9)
plt.axhline(Ptot,color='0.75',linestyle='--',label='$[P]_{tot}$')
plt.legend();
In [7]:
# Making max 400 relative fluorescence units, and scaling all of PL to that
npoints = len(Ltot)
sigma = 10.0 # size of noise
F_i = (400/1e-9)*PL + sigma * np.random.randn(npoints)
Pstated = np.ones([npoints],np.float64)*Ptot
Lstated = Ltot
In [8]:
# Uncertainties in protein and ligand concentrations.
dPstated = 0.10 * Pstated # protein concentration uncertainty
dLstated = 0.08 * Lstated # ligand concentraiton uncertainty (due to gravimetric preparation and HP D300 dispensing)
In [9]:
# Define our two-component binding system again.
def two_component_binding(DeltaG, P, L):
Kd = np.exp(DeltaG)
PL = 0.5 * ((P + L + Kd) - np.sqrt((P + L + Kd)**2 - 4*P*L)); # complex concentration (M)
P = P - PL; # free protein concentration in sample cell after n injections (M)
L = L - PL; # free ligand concentration in sample cell after n injections (M)
return [P, L, PL]
In [10]:
# Create a pymc model
def make_model(Pstated, dPstated, Lstated, dLstated, Fobs_i):
N = len(Lstated)
# Prior on binding free energies.
DeltaG = pymc.Uniform('DeltaG', lower=-40, upper=+40, value=0.0) # binding free energy (kT), uniform over huge range
# Priors on true concentrations of protein and ligand.
Ptrue = pymc.Normal('Ptrue', mu=Pstated, tau=dPstated**(-2)) # protein concentration (M)
Ltrue = pymc.Normal('Ltrue', mu=Lstated, tau=dLstated**(-2)) # ligand concentration (M)
Ltrue_control = pymc.Normal('Ltrue_control', mu=Lstated, tau=dLstated**(-2)) # ligand concentration (M)
# Priors on fluorescence intensities of complexes (later divided by a factor of Pstated for scale).
Fmax = Fobs_i.max()
F_background = pymc.Uniform('F_background', lower=0.0, upper=Fmax) # background
F_PL = pymc.Uniform('F_PL', lower=0.0, upper=Fmax/min(Pstated.max(),Lstated.max())) # complex fluorescence
F_L = pymc.Uniform('F_L', lower=0.0, upper=Fmax/Lstated.max()) # ligand fluorescence
# Unknown experimental measurement error.
log_sigma = pymc.Uniform('log_sigma', lower=-10, upper=3, value=0.0)
@pymc.deterministic
def precision(log_sigma=log_sigma): # measurement precision
return np.exp(-2*log_sigma)
# Fluorescence model.
@pymc.deterministic
def Fmodel(F_background=F_background, F_PL=F_PL, F_L=F_L, Ptrue=Ptrue, Ltrue=Ltrue, DeltaG=DeltaG):
Fmodel_i = np.zeros([N])
for i in range(N):
[P, L, PL] = two_component_binding(DeltaG, Ptrue[i], Ltrue[i])
Fmodel_i[i] = (F_PL*PL + F_L*L) + F_background
return Fmodel_i
# Experimental error on fluorescence observations.
Fobs_model = pymc.Normal('Fobs_i', mu=Fmodel, tau=precision, size=[N], observed=True, value=Fobs_i) # observed data
# Construct dictionary of model variables.
pymc_model = { 'Ptrue' : Ptrue,
'Ltrue' : Ltrue,
'Ltrue_control' : Ltrue_control,
'log_sigma' : log_sigma,
'precision' : precision,
'F_PL' : F_PL,
'F_L' : F_L,
'F_background' : F_background,
'Fmodel_i' : Fmodel,
'Fobs_model' : Fobs_model,
'DeltaG' : DeltaG # binding free energy
}
return pymc_model
In [11]:
# Build model.
pymc_model = pymc.Model(make_model(Pstated, dPstated, Lstated, dLstated, F_i))
# Sample with MCMC
mcmc = pymc.MCMC(pymc_model, db='ram', name='Sampler', verbose=True)
mcmc.sample(iter=100000, burn=10000, thin=50, progress_bar=False)
In [12]:
# Plot trace of DeltaG.
rcParams['figure.figsize'] = [15, 3]
plot(mcmc.DeltaG.trace(), 'o');
xlabel('MCMC sample');
ylabel('$\Delta G$ / $k_B T$');
In [13]:
# Plot trace of true protein concentration.
rcParams['figure.figsize'] = [15, 3]
plot(mcmc.Ptrue.trace()*1e6, 'o');
xlabel('MCMC sample');
ylabel('$[P]_{tot}$ / $\mu$M');
In [14]:
# Plot trace of true protein concentration.
rcParams['figure.figsize'] = [15, 3]
plot(mcmc.Ltrue.trace()*1e6, 'o');
xlabel('MCMC sample');
ylabel('$[L]_{tot}$ ($\mu$M)');
print mcmc.Ltrue.trace().min()
In [15]:
# Plot histogram of DeltaG.
rcParams['figure.figsize'] = [15, 3]
hist(mcmc.DeltaG.trace()[-1000:], 40);
xlabel('$\Delta G$ / $k_B T$');
ylabel('$P(\Delta G)$');
In [16]:
# Plot trace of intrinsic fluorescence parameters.
rcParams['figure.figsize'] = [15, 3]
semilogy(mcmc.F_PL.trace(), 'o', mcmc.F_L.trace(), 'o', mcmc.F_background.trace(), 'o');
legend(['complex fluorescence', 'ligand fluorescence', 'background fluorescence']);
xlabel('MCMC sample');
ylabel('relative fluorescence intensity');
In [17]:
# Plot model fit.
rcParams['figure.figsize'] = [15, 3]
figure = pyplot.gcf() # get current figure
Fmodels = mcmc.Fmodel_i.trace()
clf();
hold(True)
for Fmodel in Fmodels:
semilogx(Lstated, Fmodel, 'k-')
semilogx(Lstated, F_i, 'ro')
hold(False)
xlabel('$[L]_{tot}$ / M');
ylabel('fluorescence units');
In [18]:
nlast = 500 # number of final samples to analyze
DeltaG = mcmc.DeltaG.trace()[-nlast:].mean()
dDeltaG = mcmc.DeltaG.trace()[-nlast:].std()
print "DeltaG: %.3f +- %.3f kT" % (DeltaG, dDeltaG)
In [19]:
Kd_calc = np.exp(mcmc.DeltaG.trace()[-nlast:]).mean()
dKd_calc = np.exp(mcmc.DeltaG.trace()[-nlast:]).std()
print "Kd = %.3f +- %.3f nM [true: %.3f nM]" % (Kd_calc/1e-9, dKd_calc/1e-9, Kd/1e-9)
In [20]:
relative_error = np.abs(Kd_calc-Kd) / np.abs(Kd)
print "Relative error in Kd is %.5f %%" % (relative_error * 100)
Let's make a 'better' set of data.
In [21]:
def make_two_component_binding(Kd, Ptot, Ltot):
PL = 0.5 * ((Ptot + Ltot + Kd) - np.sqrt((Ptot + Ltot + Kd)**2 - 4*Ptot*Ltot)) # complex concentration (uM)
P = Ptot - PL; # free protein concentration in sample cell after n injections (uM)
L = Ltot - PL; # free ligand concentration in sample cell after n injections (uM)
return [P, L, PL]
All we need to do make 'better' data is refine our Ligand range.
In [22]:
Lnew = 1.0e-7 / np.array([10**(float(i)/8.0) for i in range(24)])
In [23]:
[L, P, PL] = make_two_component_binding(2e-9,Ptot,Lnew)
print PL
In [24]:
# y will be complex concentration
# x will be total ligand concentration
plt.semilogx(Lnew,PL, 'o')
plt.xlabel('L')
plt.ylabel('PL')
plt.ylim(0,1.3e-9)
plt.axhline(Ptot,color='0.75',linestyle='--',label='[Ptot]')
plt.legend();
Great! Now let's see how pymc does.
In [25]:
# Making max 400 relative fluorescence units, and scaling all of PL to that
npoints = len(Lnew)
F_i = (400/1e-9)*PL + sigma * np.random.randn(npoints)
Pstated = np.ones([npoints],np.float64)*Ptot
Lstated = Lnew
dPstated = 0.10 * Pstated
dLstated = 0.08 * Lstated
In [26]:
# Build model.
pymc_model = pymc.Model(make_model(Pstated, dPstated, Lstated, dLstated, F_i))
# Sample with MCMC
mcmc = pymc.MCMC(pymc_model, db='ram', name='Sampler', verbose=True)
mcmc.sample(iter=100000, burn=10000, thin=50, progress_bar=False)
In [27]:
# Plot trace of true protein concentration.
rcParams['figure.figsize'] = [15, 3]
plot(mcmc.Ptrue.trace()*1e6, 'o');
xlabel('MCMC sample');
ylabel('$[P]_{tot}$ / $\mu$M');
In [28]:
# Plot trace of true protein concentration.
rcParams['figure.figsize'] = [15, 3]
plot(mcmc.Ltrue.trace()*1e6, 'o');
xlabel('MCMC sample');
ylabel('$[L]_{tot}$ ($\mu$M)');
print mcmc.Ltrue.trace().min()
In [29]:
# Plot trace of DeltaG.
rcParams['figure.figsize'] = [15, 3]
plot(mcmc.DeltaG.trace(), 'o');
xlabel('MCMC sample');
ylabel('$\Delta G$ ($k_B T$)');
In [30]:
# Plot histogram of DeltaG.
rcParams['figure.figsize'] = [15, 3]
hist(mcmc.DeltaG.trace(), 40);
xlabel('$\Delta G$ ($k_B T$)');
ylabel('$P(\Delta G)$');
In [31]:
# Plot trace of intrinsic fluorescence parameters.
rcParams['figure.figsize'] = [15, 3]
semilogy(mcmc.F_PL.trace(), 'o', mcmc.F_L.trace(), 'o', mcmc.F_background.trace(), 'o');
legend(['complex fluorescence', 'ligand fluorescence', 'background fluorescence']);
xlabel('MCMC sample');
ylabel('relative fluorescence intensity');
In [32]:
# Plot model fit.
rcParams['figure.figsize'] = [15, 3]
figure = pyplot.gcf() # get current figure
Fmodels = mcmc.Fmodel_i.trace()
clf();
hold(True)
for Fmodel in Fmodels:
semilogx(Lstated, Fmodel, 'k-')
semilogx(Lstated, F_i, 'ro')
hold(False)
xlabel('$[L]_s$ (M)');
ylabel('fluorescence units');
In [33]:
nlast = 500 # number of final samples to analyze
DeltaG = mcmc.DeltaG.trace()[-nlast:].mean()
dDeltaG = mcmc.DeltaG.trace()[-nlast:].std()
print "DeltaG: %.3f +- %.3f kT" % (DeltaG, dDeltaG)
Kd_calc = np.exp(mcmc.DeltaG.trace()[-nlast:]).mean()
dKd_calc = np.exp(mcmc.DeltaG.trace()[-nlast:]).std()
print "Kd = %.3f +- %.3f nM [true: %.3f nM]" % (Kd_calc/1e-9, dKd_calc/1e-9, Kd/1e-9)
relative_error = np.abs(Kd_calc-Kd) / np.abs(Kd)
print "Relative error in Kd is %.5f %%" % (relative_error * 100)
In [ ]: