In [5]:
%matplotlib inline

In [6]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import seaborn as sns
sns.set_style('ticks')

Depenencies for using pyIDEAS/Biointense:


In [7]:
# https://github.com/dfm/corner.py
import corner

In [8]:
import hydropy as hp

In [9]:
mpl.rcParams['font.size'] = 16
mpl.rcParams['axes.labelsize'] = 18
mpl.rcParams['xtick.labelsize'] = 16
mpl.rcParams['ytick.labelsize'] = 16

In [82]:
import pylab as p
p.rc('mathtext', default='it')

Respirometer model in pyIDEAS

The following respirometric model for aerobic degradation of acetate without biomass storage will be used (Gernaey, 2002). It predicts the model output variable exogenous oxygen uptake rate of bacteria: $OUR_{ex}$ ($mg L^{-1} d^{-1}$).

$$\frac{dS}{dt} = -(1-e^{-\frac{t}{\tau}})\mu_{max}\frac{1}{Y}\frac{S}{K_S + S}X$$$$\frac{dX}{dt} = (1-e^{-\frac{t}{\tau}})\mu_{max}\frac{1}{Y}\frac{S}{K_S + S}X-bX$$$$OUR_{ex} = (1-e^{-\frac{t}{\tau}})\mu_{max}\frac{1-Y}{Y}\frac{S}{S+k_S}X$$

with

$S$: substrate concentration ($mgCOD\; L^{-1}$ ),
$X$: biomass concentration ($mg COD\; L^{-1}$),
$\tau$: retardation on biomass activity ($d^{-1}$ ),
$Y$: yield of the biomass (-),
$\mu_{max}$: maximum growth rate ($d^{-1}$),
$K_s$: half-saturation Monod constant ($mg COD\; L^{-1}$)
$\quad$(if $\mu$ equals half of $\mu_{max}$, the substrate $S$ equals $K_s$).

Reference:

Gernaey, K., Petersen, B., Nopens, I., Comeau Y. and P.A. Vanrolleghem, Modeling aerobic carbon source degradation processes using titrimetric data and combined respirometric-titrimetric data: experimental data and model structure, Biotechnology and bioengineering, 79(7), 741-753, 2002

Using the pyIDEAS modelling environment:


In [10]:
from biointense.model import Model
from biointense.measurements import ode_measurements
from biointense.optimisation import ParameterOptimisation
from biointense.sensitivity import NumericalLocalSensitivity
from biointense.confidence import BaseConfidence

We define the model equations and set up the model:


In [11]:
system = {'dS': '-(1. - exp(-t/tau))*(1/Y)*mu_max*S/(S+K_S)*X',
          'dX': '(1. - exp(-t/tau))*(1/Y)*mu_max*S/(S+K_S)*X - b*X',
          'dDO': '369.73*(8.4-DO)-((1-Y)/Y)*(1.0 - exp(-t/tau))*mu_max*S/(S+K_S)*X',
          'OURex': '((1.- exp(-t/tau))*mu_max*(1-Y)/Y*S/(S+K_S)*X)/(24.*60.)'}

parameters = {'mu_max': 4., 'K_S': 0.4, 'b': 0.62, 'Y': 0.78,
              'tau':2.25e-04}

timestepping = np.linspace(0, 0.05, 1000)
respiro_model = Model('Respirometry', system, parameters)
respiro_model.set_independent({'t': timestepping})
respiro_model.set_initial({'S': 58.48, 'X': 675, 'DO':8.4})

respiro_model.initialize_model()
output = respiro_model.run()
axs = output.plot(subplots=True, figsize=(12, 8))
output.index = timestepping


Reading in the observations

Cierkens et al


In [12]:
observations = pd.read_csv("respirometer_data.txt", sep="\t", index_col=0, 
                           names=["DO", "OURex"], skiprows=2)
observations.index = observations.index/(60*24)  #everything to days
observations.index.name='t'

In [13]:
observations.head()


Out[13]:
DO OURex
t
0.000000 8.488234 0.006513
0.000010 8.488691 0.022208
0.000022 8.493058 0.037903
0.000035 8.495394 0.053598
0.000046 8.492392 0.069293

In [14]:
observations.plot(subplots=True, figsize=(12, 8))


Out[14]:
array([<matplotlib.axes._subplots.AxesSubplot object at 0x7f44d0926550>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x7f44d0840550>], dtype=object)

In [25]:
0.015*24*60


Out[25]:
21.599999999999998

In [15]:
(observations.index*24*60)[1300]


Out[15]:
21.902864999999998

In [16]:
observations["OURex"].plot()
output["OURex"].plot()


Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f44d078b4d0>

Decubber thesis


In [17]:
cubber_ref = pd.read_csv("../thesis_decubber/0508Aall_pieken.csv")[['NewTime','DO', 'OUR filt']]
cubber_ref = cubber_ref.set_index('NewTime')

In [18]:
cubber_ref.plot()


Out[18]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f44d05796d0>

In [19]:
cubber1 = pd.read_csv("../thesis_decubber/0508A1_pieken.csv")[['NewTime','DO', 'OUR filt']]
cubber1 = cubber1.set_index('NewTime')
cubber5 = pd.read_csv("../thesis_decubber/0508A5_pieken.csv")[['NewTime','DO', 'OUR filt']]
cubber5 = cubber5.set_index('NewTime')

In [20]:
cubber1['DO'].plot()


Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f44d0763f50>

In [21]:
cubber5['DO'].plot()


Out[21]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f44d03b20d0>

So, we have three datasets to work with: observations, cubber1 and cubber5

INTERMEZZO - prepare runs brute/conservative MC

mumax and tau

# explore the parameter space N = 3 mumax_es = np.random.uniform(3.8, 4.1, N) K_s_es = np.random.uniform(0.1, 0.9, N) parameters = np.concatenate((np.atleast_2d(mumax_es), np.atleast_2d(K_s_es)), axis=0).transpose() SSE = np.zeros(N) modelruns = np.zeros((N, 2745)) for j in range(N): mu_max, K_S = parameters[j, :] modelled_our = respiro_optim._run_model(dof_array=np.array([mu_max, K_S]))["OURex"] residuals = modelled_our - \ respiro_optim.measurements.Data["OURex"] modelruns[j, :] = modelled_our SSE[j] = sum(residuals**2)
# modelruns.tofile('respiro_30000runs_4.txt') parameters.tofile('respiro_30000pars_4.txt') #SSE.tofile('respiro_30000SSE_4.txt') MPE.tofile('respiro_30000SSE_4.txt')

MC Filtering as an explorative tool

Checking behaviour of $\mu_{max}$ and K_S


In [23]:
def load_runs(ids=[4, 6], lengte=2745, N=30000):
    allruns = np.zeros((N*len(ids), lengte))
    allpars = np.zeros((N*len(ids), 2))
    allSSE = np.zeros(N*len(ids))
    for j,idrun in enumerate(ids):
        allruns[j*N:(j+1)*N, :] = np.fromfile('respiro_'+str(N) + 'runs_' +str(idrun) + '.txt').reshape(N, lengte)
        allpars[j*N:(j+1)*N, :] = np.fromfile('respiro_'+str(N) + 'pars_' +str(idrun) + '.txt').reshape(N, 2)
        allSSE[j*N:(j+1)*N] = np.fromfile('respiro_'+str(N) + 'SSE_' +str(idrun) + '.txt')
        
    return allruns, allpars, allSSE

In [ ]:
runs, pars, SSE = load_runs()

MC filtering


In [ ]:
from plot_functions_rev import Scatter_hist_withOF
import matplotlib.cm as cm

combine some output runs


In [ ]:
treshold = 1.

In [ ]:
pars_behav = pars[SSE < treshold]
SSE_behav = SSE[SSE<treshold]

In [ ]:
fig, axScatter, axHistx, axHisty,sc1 = Scatter_hist_withOF(pars[:,0], pars[:,1], data1b=pars_behav[:,0], 
                                                           data2b=pars_behav[:,1], xbinwidth = 0.02, ybinwidth=0.05, 
                                                           relative = False, SSE=SSE, SSEb=SSE_behav, vmin=0.7, 
                                                           vmax=treshold*3, colormaps=cm.Greys_r, 
                                                           cleanstyle = True, 
                                                           roodlichter=0.5)
axScatter.set_xlabel(r'$\mu_{max}$',fontsize=16)
axScatter.set_ylabel(r'$K_S$',fontsize=16)
cbar = fig.colorbar(sc1, ax=axScatter, cmap='Greys', orientation='vertical',
                    ticks=[0.7, treshold, treshold*3], shrink=1.)
txts = cbar.ax.set_yticklabels(['0.7','<'+str(treshold),'> '+str(treshold*3)])
#plt.savefig('scatterhist1.pdf')
#plt.savefig('scatterhist1.png', dpi=300)

In [ ]:
fig, axScatter, axHistx, axHisty,sc1 = Scatter_hist_withOF(pars[:,0], pars[:,1], data1b=pars_behav[:,0], 
                                                           data2b=pars_behav[:,1], xbinwidth = 0.02, ybinwidth=0.05, 
                                                           relative = False, SSE=SSE, SSEb=SSE_behav, vmin=0.7, 
                                                           vmax=treshold, colormaps=cm.Greys_r, 
                                                           cleanstyle = True)
axScatter.set_xlabel(r'$\mu_{max}$',fontsize=16)
axScatter.set_ylabel(r'$K_S$',fontsize=16)
cbar = fig.colorbar(sc1, ax=axScatter, cmap='Greys', orientation='vertical',
                    ticks=[0.7, treshold], shrink=1.)
txts = cbar.ax.set_yticklabels(['0.7','<'+str(treshold)])
#plt.savefig('scatterhist2.pdf')
#plt.savefig('scatterhist2.png', dpi=300)

An alternative objective function


In [ ]:
def SSE_part(modelled, observed, slicet):
    residuals = modelled[:slicet] - observed[:slicet]   
    return sum(residuals**2)

In [ ]:
runs.shape

In [ ]:
sse_short = np.apply_along_axis(SSE_part, 1, runs, respiro_optim.measurements.Data["OURex"].values, 1300)

In [ ]:
plt.hist(sse_short)

In [ ]:
treshold_short=0.3
pars_behav_short = pars[sse_short < treshold_short]
SSE_behav_short = sse_short[sse_short<treshold_short]

In [ ]:
sse_short.max()

In [ ]:
SSE_behav_short.min()

In [ ]:
fig, axScatter, axHistx, axHisty,sc1 = Scatter_hist_withOF(pars[:,0], pars[:,1], xbinwidth = 0.02, ybinwidth=0.05, 
                                                           relative = False, SSE=sse_short, vmin=0.16, vmax=3.2, 
                                                           colormaps=cm.Greys_r, 
                                                           cleanstyle = True)
axScatter.set_xlabel(r'$\mu_{max}$',fontsize=16)
axScatter.set_ylabel(r'$K_S$',fontsize=16)
cbar = fig.colorbar(sc1, ax=axScatter, cmap='Greys', orientation='vertical', shrink=1.)
#txts = cbar.ax.set_yticklabels(['0.05', '<'+str(treshold_short)])
#plt.savefig('scatterhist_start.pdf')
#plt.savefig('scatterhist_start.png', dpi=300)

In [ ]:
fig, axScatter, axHistx, axHisty,sc1 = Scatter_hist_withOF(pars[:,0], pars[:,1], data1b=pars_behav_short[:,0], 
                                                           data2b=pars_behav_short[:,1], xbinwidth = 0.02, ybinwidth=0.05, 
                                                           relative = False, SSE=sse_short, SSEb=SSE_behav_short, 
                                                           vmin=0.16, vmax=0.32, colormaps=cm.Greys_r, 
                                                           cleanstyle = True)
axScatter.set_xlabel(r'$\mu_{max}$',fontsize=16)
axScatter.set_ylabel(r'$K_S$',fontsize=16)
cbar = fig.colorbar(sc1, ax=axScatter, cmap='Greys', orientation='vertical',
                    ticks=[0.16, treshold_short], shrink=1.)
txts = cbar.ax.set_yticklabels(['0.16', '<'+str(treshold_short)])
plt.savefig('scatterhist_start.pdf')
plt.savefig('scatterhist_start.png', dpi=300)

Confronting the measurements and the model in the optimization class:

Focusing on the OUR measurements only:


In [65]:
respiro_data = ode_measurements(pd.DataFrame(observations["OURex"]))

Adding a relative measurement error for the measurements (based on the noise of the measurement):


In [66]:
respiro_data.add_measured_errors({'OURex': 0.01})

We assume that only the kinetic parameters are uncertain and need to be estimated:


In [67]:
respiro_optim = ParameterOptimisation(respiro_model, respiro_data, optim_par=['mu_max','K_S', 'tau'])

Running a local optimization (actually maximuma likelihood special case situation):


In [175]:
respiro_optim.local_optimize(pardict={'mu_max':3., 'K_S':0.6, 'tau': 2.26e-4}, obj_crit='wsse', method='Nelder-Mead')


Out[175]:
  status: 0
    nfev: 462
 success: True
     fun: 12333014.088299507
       x: array([  3.91223097e+00,   3.30235455e-01,   2.59862865e-04])
 message: 'Optimization terminated successfully.'
     nit: 237

In [176]:
#mumax_es = np.random.uniform(1.0, 4., N)
#K_s_es = np.random.uniform(0.1, 0.8, N)

In [180]:
plt.plot(respiro_optim.modmeas["Measured"]["OURex"])
plt.plot(respiro_optim.modmeas["Modelled"]["OURex"])
respiro_optim.dof
myrun = respiro_optim._run_model(dof_array=np.array([3.9, 0.4]))



In [35]:
#myrun

Exploration of using pyIDEAS in emcee Hammer: posterior of $\mu_{max}$ and $K_S$


In [52]:
import emcee

In [56]:
def respiro_get_residuals(parameters):
    """run respiromodel and calculate the residuals as a numpy.array
    """
    mu_max, K_S = parameters
    residuals = respiro_optim._run_model(dof_array=np.array([mu_max, K_S]))["OURex"] - \
                                respiro_optim.measurements.Data["OURex"]
    return residuals.values

In [57]:
respiro_get_residuals([3.9, 0.4])


Out[57]:
array([-0.00651276,  0.00072707,  0.00955954, ..., -0.00864222,
       -0.00939906, -0.01015596])

In [58]:
def lnprior(parameters):
    mu_max, K_S = parameters
    if 3.5 < mu_max < 4.2 and 0.1 < K_S < 0.9:
        return 0.0
    return -np.inf

In [59]:
# gaussian, geen meetfout,...
def lnlike(parameters):
    residuals = respiro_get_residuals(parameters)
    observed = respiro_optim.measurements.Data["OURex"].values
    N = len(observed)
    sigma2 = np.std(observed)**2.
    loglike = -N*np.log(2*np.pi)/2. - N*np.log(sigma2)/2. - ((residuals)**2).sum()/(2*sigma2)
    return loglike

In [60]:
def lnprob(parameters):
    lp = lnprior(parameters)
    if not np.isfinite(lp):
        return -np.inf
    return lp + lnlike(parameters)

In [42]:
ndim, nwalkers = 2, 100
pos = [[3.9, 0.32] + 1e-4*np.random.randn(ndim) for i in range(nwalkers)]

In [43]:
respiro_optim.measurements.Data["OURex"].values


Out[43]:
array([ 0.00651276,  0.02220774,  0.03790266, ...,  0.00864222,
        0.00939906,  0.01015596])

In [44]:
sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob)
sampl_out = sampler.run_mcmc(pos, 500)

In [45]:
fig, ax = plt.subplots(2, 1)
for i in range(100):
    ax[0].plot(sampler.chain[i, :, 0])
    ax[1].plot(sampler.chain[i, :, 1])
ax[0].set_ylabel(r'$\mu_{max}$')
ax[1].set_ylabel(r'$K_S$')


Out[45]:
<matplotlib.text.Text at 0x7f8cf482d610>

In [46]:
print("Mean acceptance fraction: {0:.3f}".format(np.mean(sampler.acceptance_fraction)))


Mean acceptance fraction: 0.723

In [47]:
samples = sampler.chain[:, 200:, :].reshape((-1, ndim))

In [48]:
import corner

In [49]:
import corner
fig = corner.corner(samples, labels=["$\mu_{max}$", "$K_S$"],
                    quantiles=None, plot_contours=True)
fig.savefig('test.pdf')



In [ ]:
sampler.acor

In [ ]:
temp = fig.get_axes()[2]
xlims = temp.get_xlim()
ylims = temp.get_ylim()

In [ ]:
xlims, ylims

Parameter estimation of the parameters $\mu_{max}$, $K_S$ and $\tau$

Bayesian estimation of the parameter posterior distribution

Helper function to link the model and errors -> residuals


In [69]:
def respiro_get_residuals2(parameters):
    """run respiromodel and calculate the residuals as a numpy.array
    """
    mu_max, K_S, tau = parameters
    residuals = respiro_optim._run_model(dof_array=np.array([mu_max, K_S, tau]))["OURex"] - \
                                respiro_optim.measurements.Data["OURex"]
    return residuals.values

Assuming Gaussian errors, without any measurement error:


In [70]:
# gaussian, geen meetfout,...
def lnlike2(parameters):
    residuals = respiro_get_residuals2(parameters)
    observed = respiro_optim.measurements.Data["OURex"].values
    N = len(observed)
    sigma2 = np.std(observed)**2.
    loglike = -N*np.log(2*np.pi)/2. - N*np.log(sigma2)/2. - ((residuals)**2).sum()/(2*sigma2)
    return loglike

Uniform priors within specified regions:


In [71]:
def lnprior2(parameters):
    mu_max, K_S, tau = parameters
    if 3.5 < mu_max < 4.2 and 0.1 < K_S < 0.9 and 0.000226*0.5 < tau < 0.001*1.5:
        return 0.0
    return -np.inf

Combining the probabilities of the prior and the gaussian likihood function:


In [72]:
def lnprob2(parameters):
    lp = lnprior2(parameters)
    if not np.isfinite(lp):
        return -np.inf
    return lp + lnlike2(parameters)

Applying the emcee Hammer library to sample the posterior:


In [73]:
ndim, nwalkers = 3, 100
pos = [[3.9, 0.32, 2.25e-04] + 1e-4*np.random.randn(ndim) for i in range(nwalkers)]

In [74]:
sampler2 = emcee.EnsembleSampler(nwalkers, ndim, lnprob2)
sampl_out2 = sampler2.run_mcmc(pos, 500)

Check the different sampler chains:


In [173]:
fig, ax = plt.subplots(3, 1, figsize=(12, 8), sharex=True)
for i in range(100):
    ax[0].plot(sampler2.chain[i, :, 0], c='0.3', linewidth=0.2)
    ax[1].plot(sampler2.chain[i, :, 1], c='0.3', linewidth=0.2)
    ax[2].plot(sampler2.chain[i, :, 2], c='0.3', linewidth=0.2)
ax[0].set_ylabel(r'$\mu_{max}$')
ax[0].set_yticks([3.8, 3.9, 4., 4.1])
ax[1].set_ylabel(r'$K_S$')
ax[1].set_yticks([0.1, 0.4, 0.7])
ax[2].set_ylabel(r'$\tau$')
ax[2].set_yticks([-0.0, 0.0002, 0.0004])
plt.savefig("mcmc_samples.pdf")



In [77]:
samples = sampler2.chain[:, 200:, :].reshape((-1, ndim))

In [144]:
import corner
fig= corner.corner(samples, labels=["$\mu_{max}$", "$K_S$", r"$\tau$"],
                    quantiles=None, plot_contours=True)
#fig.savefig('test.pdf')
axs = fig.get_axes()
#axs[8].xaxis.get_major_formatter().set_powerlimits((0, 1))

scale_pow = 4
from matplotlib import ticker
from matplotlib.ticker import FuncFormatter

def my_formatter_fun(x, p):
    return "%.1f" % (x * (10 ** scale_pow)) + ' 10$^{{{0:d}}}$'.format((-1*scale_pow))
axs[6].get_yaxis().set_major_formatter(ticker.FuncFormatter(my_formatter_fun))
axs[8].get_xaxis().set_major_formatter(ticker.FuncFormatter(my_formatter_fun))

for ax in axs:
    ax.yaxis.set_label_coords(-0.4, 0.5)
    ax.xaxis.set_label_coords(0.5, -0.5)
    
for tick in axs[6].get_yaxis().get_major_ticks():
    tick.set_pad(-2)
    tick.label1 = tick._get_text1()
plt.setp(axs[6].yaxis.get_majorticklabels(), rotation=45 )

#axs[6].yaxis.get_major_formatter().set_scientific(True)

#axs[6].set_yticklabels(['1.6e$^{-4}$','2.4e$^{-4}$','3.2e$^{-4}$','4.0e$^{-4}$'])
plot_margin = 0.25

plt.savefig('mcmc_result_respirometry.pdf', bbox_inches='tight')



In [174]:
sampler2.acor


Out[174]:
array([ 68.02049256,  66.92189665,  42.92611976])

Maximum Likelihood estimation


In [160]:
import scipy.optimize as op
nll = lambda *args: -lnlike2(*args)
result = op.minimize(nll, [3.9, 0.3, 2.25e-04], method='Nelder-Mead')
m_ml, b_ml, lnf_ml = result["x"]

In [161]:
result


Out[161]:
  status: 0
    nfev: 97
 success: True
     fun: -1275.6599206349847
       x: array([  3.93672964e+00,   4.55131937e-01,   2.15231754e-04])
 message: 'Optimization terminated successfully.'
     nit: 52

In [162]:
m_ml, b_ml, lnf_ml


Out[162]:
(3.9367296379326135, 0.45513193684773456, 0.00021523175445251703)

This does indeed agree with the density regions found by the markov chain sampler

Check the resulting model calibration result:


In [183]:
plt.plot(respiro_optim.modmeas["Measured"]["OURex"])
myrun = respiro_optim._run_model(dof_array=np.array([3.93, 0.45, 0.000215]))
myrun.OURex.plot()


Out[183]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f44be0fcf90>

In [ ]: