This Notebook contains the Python code source used to compute and analyse uncertainty propagations in peptide-protein binding experiments described in the Protocol paper entitled :
"Accurate protein-peptide titration experiments by Nuclear Magnetic Resonance using low-volume samples"
Published in the Method in Molecular Biology serie (Springer).
In [40]:
%pylab
In [15]:
"""
equilibrium.py Version 5.1
B Kieffer
Copyright (c) 2010 IGBMC. All rights reserved.
Tool to fit NMR Chemical Shift Perturbation data to extract Kd values
"""
import matplotlib.pyplot as plt
import numpy as np
from scipy import optimize, integrate
from random import gauss
############################################################################
class equi():
def __init__(self):
self.AR0 = np.array([]) # Array of total receptor concentrations
self.AL0 = np.array([]) # Array of total ligand concentrations
self.YEXP = np.array([]) # Array of experimental values (such as chemical shifts)
self.YCAL = np.array([]) # Array of calculated values, from the model
self.noise = 0.0 # Noise estimate on the experimental data
self.npoints = 0 # Number of titration points
#-----------------------------------------------------
def calc_bound1s(self,kd):
""" Calculate bound fractions for a one site system
kd is the dissociation constant
Equation : x^2 - x [1 + L0/R0 + Kd/R0] + L0/R0 = 0
"""
y = self.AL0/self.AR0
b = 1+y+(kd/self.AR0)
z = np.sqrt(b**2 - 4*y)
X = (b - z)/2
return X
#-----------------------------------------------------
def load(self,fname):
""" Load experimental data with N column tabulated format
R0 L0 VALUE
if more than 3 columns are provided, YEXP is the mean value
"""
file = open(fname,'r')
all_lines=file.readlines()
file.close()
# Detect the number of fields in the line
fields = []
firstline = all_lines[0]
fields = firstline.split()
nf = len(fields)
print " %3d fields in file %s " % (nf, fname)
print "First line : %s \n" % firstline
temp = []
for line in all_lines :
if line[0] != "#": # Exclude commented lines
tf = []
for i in range(0,nf):
tf.append(float(line.split()[i]))
temp.append(tf)
atemp = np.array(temp)
self.AR0 = atemp.T[0]
self.AL0 = atemp.T[1]
self.YEXP = atemp.T[2]
if nf > 3 : # If more than one column is found, computes the average
for i in range(2,nf):
self.YEXP += atemp.T[i]
self.YEXP = self.YEXP/(nf-2)
print "%d Values read " % len(self.AR0)
self.npoints = len(self.AR0)
return True
#-----------------------------------------------------
def model0(self,PAR,CNST=[]) :
""" model 0 : 2 parameters fit of high affinity site using
ppmax and kd as parameters (in the list PAR)
"""
kd = PAR[0]
ppmax = PAR[1]
self.YCAL = ppmax*self.calc_bound1s(kd)
dist = np.subtract(self.YEXP,self.YCAL)
return dist
#-----------------------------------------------------
def fit(self,fun,PAR,CNST=[]) :
""" Fit the target function
PAR is the vector of parameters to be adjusted
CNST is a vector of constant values
nrepeat is used to preform Monte-Carlo simulations (if above 1)
"""
PAROPT,n = optimize.leastsq(fun,PAR,args=(CNST))
NP=len(self.YEXP)
dist = np.subtract(self.YEXP,self.YCAL)
self.noise = np.linalg.norm(dist)/np.sqrt(NP)
return PAROPT
#-----------------------------------------------------
def add_noise(self,X,Xerr):
""" Add a gaussian noise to the array X
Returns a numpy array containing X+DX
"""
if len(X) <> 0 and len(X) == len(Xerr):
XD = []
for abserr in Xerr:
XD.append(abserr*gauss(0,1))
DX = np.array(XD)
return X+DX
else:
return None
#-----------------------------------------------------
def monte_carlo(self,data_err_abs,AR0_err_abs,AL0_err_abs,ns=1000,PARINI = [0.5, 0.25]):
""" Compute the statistical error produced by random uncertainties on Ligand concentrations and chemical shifts
AR0_err_abs and AL0_err_abs are array containing absolute errors on protein and ligand concentrations.
data_err_abs is an array that contains absolute uncertainties on chemical shifts
"""
# Save initial values of protein and ligand concentrations and chemical shifts
AR0_save = self.AR0
AL0_save = self.AL0
YCAL_save = self.YCAL
# Initialize empty arrays to store results
store_kd = []
store_ppm = []
store_AL0 = []
store_YEXP = []
for i in range(ns):
self.AR0=add_noise_logn(AR0_save,AR0_err_abs)
self.AL0=add_noise_logn(AL0_save,AL0_err_abs)
self.YEXP=add_noise(YCAL_save,data_err_abs)
PAROPT = self.fit(self.model0,PARINI)
store_kd.append(PAROPT[0])
store_ppm.append(PAROPT[1])
store_AL0.append(self.AL0)
store_YEXP.append(self.YEXP)
store_kd=np.array(store_kd)
store_ppm=np.array(store_ppm)
return (store_kd,store_ppm,store_AL0,store_YEXP)
########################### END OF EQUI OBJECT #################################
def add_noise(X,Xerr):
""" Add a gaussian noise to the array X
Returns a numpy array containing X+DX """
if len(X) <> 0 and len(X) == len(Xerr):
XD = []
for abserr in Xerr:
XD.append(abserr*gauss(0,1))
DX = np.array(XD)
return X+DX
else:
return None
def add_noise_logn(X,Xerr):
""" Add noise to the array X according a Log-normal distribution
Returns a numpy array containing X+DX """
if len(X) <> 0 and len(X) == len(Xerr):
XD = []
j=0
for abserr in Xerr:
if (abserr == 0 or X[j] == 0):
tmp=[X[j]]
else:
mu = np.log(X[j])
sigma = abserr/X[j]
tmp = np.random.lognormal(mu, sigma, 1)
XD.append(tmp[0])
j+=1
return np.array(XD)
else:
return None
In [41]:
a = equi() # Create an object for data fitting
# Load several data files and average
a.load("PK63.txt")
(exp_x, exp_y)= (a.AL0,a.YEXP)
PARINI = [10, 0.25] # Initial parameters for the fit
# Perform a first fit
PAROPT= a.fit(a.model0,PARINI)
# Estimate the error using Monte-Carlo Simulation
Y_err_abs = np.ones(a.npoints)*a.noise
AR0_err_abs = np.ones(a.npoints)*7 # Absolute errors on Receptor concentrations
AL0_err_abs = np.array([0.0, 0.6, 0.9, 1.7, 4.3, 8.7, 16.9, 33.3, 44.4, 82.7, 135.4]) # Absolute errors on Ligand concentrations
kd_val,ppm_val,x_val,y_val = a.monte_carlo(Y_err_abs,AR0_err_abs,AL0_err_abs,ns = 200)
semilogx(x_val,y_val,'.',color="0.8")
# Plot experimental values
p1 = semilogx(exp_x,exp_y,'or')
# Plot optimal curve (from PAROPT parameters)
b = equi() # Create a second object to plot the model
R0 = a.AR0[0] # get the experimental concentration of protein
b.AL0 = np.arange(0.0,4000.0,5.0)
NP = len(b.AL0)
b.AR0 = np.ones(NP)*R0
b.YEXP = np.zeros(NP)
d = b.model0(PAROPT)
p3 = semilogx(b.AL0,b.YCAL,'-k')
tick_params(labelsize='16')
axis([4,4000,-0.03,0.3])
print "Kd: %8.2f +/- %8.2f" % (kd_val.mean(),kd_val.std())
print "ppm: %8.3f +/- %8.4f " % (ppm_val.mean(),ppm_val.std())
show()
In [50]:
PARINI = [10, 0.25] # Initial parameters for the fit
rel_error = [0.6,0.8, 1, 1.2, 1.4]
colors = ['y.','r.','k.','g.','b.']
for i in range(len(rel_error)):
a = equi()
# Load several data files and average
a.load("pk63.txt")
a.AL0 = a.AL0*rel_error[i] # Introduce systematic error
PAROPT= a.fit(a.model0,PARINI) # Perform a first fit
Y_err_abs = np.ones(a.npoints)*a.noise
AR0_err_abs = np.ones(a.npoints)*7 # Absolute errors on Receptor concentrations
AL0_err_abs = np.ones(a.npoints)*a.AL0*0.1 # 10 % statistical error
kd_val,ppm_val,x_val,y_val = a.monte_carlo(Y_err_abs,AR0_err_abs,AL0_err_abs,ns = 800)
plot(kd_val,ppm_val,colors[i])
print "error computed for systematic error of %3.1f " % rel_error[i]
print "Kd: %8.2f +/- %8.2f" % (kd_val.mean(),kd_val.std())
print "ppm: %8.3f +/- %8.4f " % (ppm_val.mean(),ppm_val.std())
print " "
axis([0.0,120.,0.23,0.28])
show()
In [58]:
PARINI = [10, 0.25] # Initial parameters for the fit
rel_error = [50.,40., 30., 20., 10.]
colors = ['y.','r.','g.','b.','k.']
for i in range(len(rel_error)):
a = equi()
# Load several data files and average
a.load("pk63.txt")
PAROPT= a.fit(a.model0,PARINI) # Perform a first fit
Y_err_abs = np.ones(a.npoints)*a.noise
AR0_err_abs = np.ones(a.npoints)*7 # Absolute errors on Receptor concentrations
AL0_err_abs = np.ones(a.npoints)*rel_error[i]*a.AL0/100
kd_val,ppm_val,x_val,y_val = a.monte_carlo(Y_err_abs,AR0_err_abs,AL0_err_abs,ns = 1000)
plot(kd_val,ppm_val,colors[i])
print "error computed for statistical error of %d " % rel_error[i]
print "Kd: %8.2f +/- %8.2f" % (kd_val.mean(),kd_val.std())
print "ppm: %8.3f +/- %8.4f " % (ppm_val.mean(),ppm_val.std())
print " "
axis([0.0,120.,0.23,0.28])
show()
In [ ]: