peptide-protein dissociation constant measurement using NMR Chemical Shift Perturbation data

This Notebook contains the Python code source used to compute and analyse uncertainty propagations in peptide-protein binding experiments described in the Protocol paper entitled :

"Accurate protein-peptide titration experiments by Nuclear Magnetic Resonance using low-volume samples"

Published in the Method in Molecular Biology serie (Springer).


In [40]:
%pylab


Using matplotlib backend: MacOSX
Populating the interactive namespace from numpy and matplotlib

In [15]:
"""
equilibrium.py   Version 5.1

B Kieffer
Copyright (c) 2010 IGBMC. All rights reserved.

Tool to fit NMR Chemical Shift Perturbation data to extract Kd values
""" 

import matplotlib.pyplot as plt
import numpy as np
from scipy import optimize, integrate
from random import gauss



############################################################################
class equi():
   
    def __init__(self):
        self.AR0 = np.array([])  # Array of total receptor concentrations
        self.AL0 = np.array([])  # Array of total ligand concentrations
        self.YEXP = np.array([]) # Array of experimental values (such as chemical shifts)
        self.YCAL = np.array([]) # Array of calculated values, from the model
        self.noise = 0.0         # Noise estimate on the experimental data
        self.npoints = 0         # Number of titration points 

#-----------------------------------------------------      

    def calc_bound1s(self,kd):
        """ Calculate bound fractions for a one site system
            kd is the dissociation constant
            Equation : x^2 - x [1 + L0/R0 + Kd/R0] + L0/R0 = 0
        """
      
        y = self.AL0/self.AR0
        b = 1+y+(kd/self.AR0)
        z = np.sqrt(b**2 - 4*y)
        X = (b - z)/2

        return X

#-----------------------------------------------------
    def load(self,fname):
        """ Load experimental data with N column tabulated format
            R0 L0 VALUE
            if more than 3 columns are provided, YEXP is the mean value
        """
        file = open(fname,'r')
        all_lines=file.readlines()
        file.close()
      
        # Detect the number of fields in the line
        fields = []
        firstline = all_lines[0]
        fields = firstline.split()
        nf = len(fields)
        print " %3d fields in file %s " % (nf, fname)
        print "First line : %s \n" % firstline

        temp = []
        for line in all_lines :
            if line[0] != "#": # Exclude commented lines
                tf = []
                for i in range(0,nf): 
                    tf.append(float(line.split()[i]))
                temp.append(tf)
        atemp = np.array(temp)
        self.AR0 = atemp.T[0]
        self.AL0 = atemp.T[1]
        self.YEXP = atemp.T[2]
        
        if nf > 3 : # If more than one column is found, computes the average
            for i in range(2,nf):
                self.YEXP += atemp.T[i]
            self.YEXP = self.YEXP/(nf-2)
            
        print "%d Values read " % len(self.AR0)
        self.npoints = len(self.AR0)

        return True
      
#-----------------------------------------------------
    def model0(self,PAR,CNST=[]) :
        """ model 0 : 2 parameters fit of high affinity site using 
            ppmax and kd as parameters (in the list PAR)
        """
      
        kd = PAR[0]
        ppmax = PAR[1]
      
        self.YCAL = ppmax*self.calc_bound1s(kd)
        dist = np.subtract(self.YEXP,self.YCAL)
        
        return dist
            
#-----------------------------------------------------
    def fit(self,fun,PAR,CNST=[]) :
        """ Fit the target function 
            PAR is the vector of parameters to be adjusted
            CNST is a vector of constant values
            nrepeat is used to preform Monte-Carlo simulations (if above 1)
        """
      
        PAROPT,n = optimize.leastsq(fun,PAR,args=(CNST))
        NP=len(self.YEXP)
        dist = np.subtract(self.YEXP,self.YCAL)
        self.noise = np.linalg.norm(dist)/np.sqrt(NP)
      
        return PAROPT

#-----------------------------------------------------
    def add_noise(self,X,Xerr):
        """ Add a gaussian noise to the array X 
            Returns a numpy array containing X+DX
        """
        if len(X) <> 0 and len(X) == len(Xerr):
            XD = []
            for abserr in Xerr:
                XD.append(abserr*gauss(0,1))
            DX = np.array(XD)
            return X+DX
        else:
            return None

#-----------------------------------------------------    
    def monte_carlo(self,data_err_abs,AR0_err_abs,AL0_err_abs,ns=1000,PARINI = [0.5, 0.25]):
        """ Compute the statistical error produced by random uncertainties on Ligand concentrations and chemical shifts 
            AR0_err_abs and AL0_err_abs are array containing absolute errors on protein and ligand concentrations.
            data_err_abs is an array that contains absolute uncertainties on chemical shifts
        """

        # Save initial values of protein and ligand concentrations and chemical shifts
        AR0_save = self.AR0
        AL0_save = self.AL0
        YCAL_save = self.YCAL
    
        # Initialize empty arrays to store results
        store_kd = []
        store_ppm = []
        store_AL0 = []
        store_YEXP = []

        for i in range(ns):
            self.AR0=add_noise_logn(AR0_save,AR0_err_abs)
            self.AL0=add_noise_logn(AL0_save,AL0_err_abs)
            self.YEXP=add_noise(YCAL_save,data_err_abs)
    
            PAROPT = self.fit(self.model0,PARINI)
            store_kd.append(PAROPT[0])
            store_ppm.append(PAROPT[1])
            store_AL0.append(self.AL0)
            store_YEXP.append(self.YEXP)

        store_kd=np.array(store_kd)
        store_ppm=np.array(store_ppm)

        return (store_kd,store_ppm,store_AL0,store_YEXP)

########################### END OF EQUI OBJECT #################################

def add_noise(X,Xerr):
    """ Add a gaussian noise to the array X 
        Returns a numpy array containing X+DX """
    if len(X) <> 0 and len(X) == len(Xerr):
        XD = []
        for abserr in Xerr:
            XD.append(abserr*gauss(0,1))
        DX = np.array(XD)
        return X+DX
    else:
        return None
    
def add_noise_logn(X,Xerr):
    """ Add noise to the array X according a Log-normal distribution
        Returns a numpy array containing X+DX """
    if len(X) <> 0 and len(X) == len(Xerr):
        XD = []
        j=0
        for abserr in Xerr:
            if (abserr == 0 or X[j] == 0):
                tmp=[X[j]]
            else:
                mu = np.log(X[j])
                sigma = abserr/X[j]
                tmp = np.random.lognormal(mu, sigma, 1)
            XD.append(tmp[0])
            j+=1
        return np.array(XD)
    else:
        return None

1. Experimental Kd measurement and uncertainty evaluation


In [41]:
a = equi() # Create an object for data fitting
# Load several data files and average
a.load("PK63.txt")
(exp_x, exp_y)= (a.AL0,a.YEXP)

PARINI = [10, 0.25] # Initial parameters for the fit
# Perform a first fit
PAROPT= a.fit(a.model0,PARINI)

# Estimate the error using Monte-Carlo Simulation
Y_err_abs = np.ones(a.npoints)*a.noise
AR0_err_abs = np.ones(a.npoints)*7  # Absolute errors on Receptor concentrations
AL0_err_abs = np.array([0.0, 0.6, 0.9, 1.7, 4.3, 8.7, 16.9, 33.3, 44.4, 82.7, 135.4]) # Absolute errors on Ligand concentrations

kd_val,ppm_val,x_val,y_val = a.monte_carlo(Y_err_abs,AR0_err_abs,AL0_err_abs,ns = 200)
semilogx(x_val,y_val,'.',color="0.8")

# Plot experimental values
p1 = semilogx(exp_x,exp_y,'or')

# Plot optimal curve (from PAROPT parameters)
b = equi()      # Create a second object to plot the model
R0 = a.AR0[0]   # get the experimental concentration of protein
b.AL0 = np.arange(0.0,4000.0,5.0)
NP = len(b.AL0)
b.AR0 = np.ones(NP)*R0
b.YEXP = np.zeros(NP)
d = b.model0(PAROPT)
p3 = semilogx(b.AL0,b.YCAL,'-k')
tick_params(labelsize='16')
axis([4,4000,-0.03,0.3])


print "Kd: %8.2f +/- %8.2f" % (kd_val.mean(),kd_val.std())
print "ppm: %8.3f +/- %8.4f " % (ppm_val.mean(),ppm_val.std())


show()


  12 fields in file PK63.txt 
First line :  64.4    0.0    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00
 

11 Values read 
Kd:    52.47 +/-     3.14
ppm:    0.257 +/-   0.0010 

2. Effect of systematic error on the peptide concentrations


In [50]:
PARINI = [10, 0.25] # Initial parameters for the fit

rel_error = [0.6,0.8, 1, 1.2, 1.4]
colors = ['y.','r.','k.','g.','b.']
for i in range(len(rel_error)):
    a = equi()
    # Load several data files and average
    a.load("pk63.txt")
    a.AL0 = a.AL0*rel_error[i] # Introduce systematic error

    PAROPT= a.fit(a.model0,PARINI) # Perform a first fit
    Y_err_abs = np.ones(a.npoints)*a.noise
    AR0_err_abs = np.ones(a.npoints)*7             # Absolute errors on Receptor concentrations
    AL0_err_abs = np.ones(a.npoints)*a.AL0*0.1     # 10 % statistical error
    
    kd_val,ppm_val,x_val,y_val = a.monte_carlo(Y_err_abs,AR0_err_abs,AL0_err_abs,ns = 800)
    
    plot(kd_val,ppm_val,colors[i])
    print "error computed for systematic error of %3.1f " % rel_error[i]
    print "Kd: %8.2f +/- %8.2f" % (kd_val.mean(),kd_val.std())
    print "ppm: %8.3f +/- %8.4f " % (ppm_val.mean(),ppm_val.std())
    print " "
axis([0.0,120.,0.23,0.28])
show()


  12 fields in file pk63.txt 
First line :  64.4    0.0    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00
 

11 Values read 
error computed for systematic error of 0.6 
Kd:    19.61 +/-     3.79
ppm:    0.252 +/-   0.0030 
 
  12 fields in file pk63.txt 
First line :  64.4    0.0    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00
 

11 Values read 
error computed for systematic error of 0.8 
Kd:    35.56 +/-     4.25
ppm:    0.255 +/-   0.0019 
 
  12 fields in file pk63.txt 
First line :  64.4    0.0    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00
 

11 Values read 
error computed for systematic error of 1.0 
Kd:    52.11 +/-     5.43
ppm:    0.257 +/-   0.0018 
 
  12 fields in file pk63.txt 
First line :  64.4    0.0    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00
 

11 Values read 
error computed for systematic error of 1.2 
Kd:    69.79 +/-     6.12
ppm:    0.258 +/-   0.0018 
 
  12 fields in file pk63.txt 
First line :  64.4    0.0    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00
 

11 Values read 
error computed for systematic error of 1.4 
Kd:    87.37 +/-     7.27
ppm:    0.259 +/-   0.0019 
 

3. Effect of statistcal noise on peptide concentrations


In [58]:
PARINI = [10, 0.25] # Initial parameters for the fit

rel_error = [50.,40., 30., 20., 10.]
colors = ['y.','r.','g.','b.','k.']

for i in range(len(rel_error)):
    a = equi()
    # Load several data files and average
    a.load("pk63.txt")

    PAROPT= a.fit(a.model0,PARINI) # Perform a first fit
    
    Y_err_abs = np.ones(a.npoints)*a.noise
    AR0_err_abs = np.ones(a.npoints)*7             # Absolute errors on Receptor concentrations
    AL0_err_abs = np.ones(a.npoints)*rel_error[i]*a.AL0/100

    kd_val,ppm_val,x_val,y_val = a.monte_carlo(Y_err_abs,AR0_err_abs,AL0_err_abs,ns = 1000)
    
    plot(kd_val,ppm_val,colors[i])
    print "error computed for statistical error of %d " % rel_error[i]
    print "Kd: %8.2f +/- %8.2f" % (kd_val.mean(),kd_val.std())
    print "ppm: %8.3f +/- %8.4f " % (ppm_val.mean(),ppm_val.std())
    print " "
    
axis([0.0,120.,0.23,0.28])
show()


  12 fields in file pk63.txt 
First line :  64.4    0.0    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00
 

11 Values read 
error computed for statistical error of 50 
Kd:    52.33 +/-    22.29
ppm:    0.255 +/-   0.0069 
 
  12 fields in file pk63.txt 
First line :  64.4    0.0    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00
 

11 Values read 
error computed for statistical error of 40 
Kd:    53.92 +/-    19.76
ppm:    0.256 +/-   0.0058 
 
  12 fields in file pk63.txt 
First line :  64.4    0.0    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00
 

11 Values read 
error computed for statistical error of 30 
Kd:    52.54 +/-    14.62
ppm:    0.256 +/-   0.0045 
 
  12 fields in file pk63.txt 
First line :  64.4    0.0    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00
 

11 Values read 
error computed for statistical error of 20 
Kd:    52.01 +/-     9.94
ppm:    0.256 +/-   0.0031 
 
  12 fields in file pk63.txt 
First line :  64.4    0.0    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00
 

11 Values read 
error computed for statistical error of 10 
Kd:    52.34 +/-     5.58
ppm:    0.257 +/-   0.0017 
 

In [ ]: