pk_milk demo - basic functionality

Package import


In [9]:
import os
import sys
import numpy as np
import pandas as pd
from scipy import stats
from scipy import integrate
import scipy.interpolate as interpolate
import matplotlib.pyplot as plt
%matplotlib inline 
from scipy.optimize import curve_fit

sys.path.append("..") # add system path for above directory. 
from pk_milk import pk_milk

Class argument explanation

  • gens=1 :: # of generations. Gen 1 is the mother. Each subsequent gen is composed of daughters that become mothers.
  • y_end=100 :: Simulation end year.
  • lifespan=80 :: Lifespan of each generation (e.g., gen 1 is born in year 0 and dies in year 80).
  • brth_age=25 :: Age at which each generation gives birth. A birth schedule can be input, as well, using an embedded class function.
  • average_lact_time=[0.5]*5 :: an array (must have at least as many elements as # of gens), describing the lactation time period in years.
  • k_lac=[1e-2] * 5 :: an array (must have at least as many elements as # of gens), describing the 1st order lactation kinetic rate coefficient. time period in years [1/years].
  • k_elim=[np.log(2) / 5] * 5 :: an array (must have at least as many elements as # of gens), describing the 1st order congener elemination kinetic rate coefficient [1/years]. Kinetics can also be determined by inspection of the biomonitoring data for an individual congener, as well.
  • odes_in_each_generation=2 :: the number of ode's in each gen (e.g., 2: one for body mass and one for concentration of congener in person).

Problem statement

In this example, let's solve some problems:

  1. What is the time concentration profile of a congener in each of the 5 generations since the introduction of the chemical?
  2. Which generation is at the highest risk?
  3. How does the concentration of a congener vary throughout each generation (e.g., what is the concentration of the congener for 20 year olds of each generation)?

initialize the pk_milk class.

  1. assume the birth age is 25, so 5 generations is 125 years for the 5th gen to be born. For a lifespan of 80 years, this means that the simulation should end at: 125 + 80 or 205 years.
  2. assume that the average lactation time for each generation is 10 months.
  3. assume that the 1st order lactation coefficient is 1e-1 [1/years].
  4. assume that the 1st order kinetic elimination coefficient is log(2)/5 [1/years]. Note: k_elim can also be calculated for each congener, which will be demonstrated.
  5. assume that there are 2 ode's per generation: one for fat mass and one for chemical in fat mass.

In [10]:
pk_milk_base = pk_milk(gens=3, y_end=230,lifespan=80, 
                       brth_age = 25, average_lact_time=[10/12]*5,
                      k_lac=[1e-2] * 5, k_elim=[np.log(2) / 5] * 5)

In [11]:
# calculate and set the delta_t. 
pk_milk_base.dt_from_timesteps_(timestep_variable=10, method='timesteps_per_month')


new timesteps/month: 10
new delta_t: 0.1
Out[11]:
<pk_milk.pk_milk at 0x10ae96a20>

In [12]:
# calculate and set the number of simulation timesteps
pk_milk_base.n_steps_()


calculated n_steps: 2301.0
Out[12]:
<pk_milk.pk_milk at 0x10ae96a20>

Calculating kinetics data

calculate the kinetics for a congener for a single congener


In [13]:
# grab some data
cwd =os.getcwd()
congener_biomonitoring_data_location = os.path.join(cwd,'example_data','mikes_et_al_2012.csv')
biomonitoring_data = pd.read_csv(congener_biomonitoring_data_location)

biomonitoring_data_years = biomonitoring_data.ix[:,0]
biomonitoring_data_congener = biomonitoring_data.ix[:,1] 

# compile to matrix, use transpose if neccessary to get the right format. 
biomonitoring_data_matrix = np.matrix([biomonitoring_data_years,biomonitoring_data_congener]).T

col_year = np.array(biomonitoring_data_matrix[:, 0]).flatten('C')
col_congener = np.array(biomonitoring_data_matrix[:, 1]).flatten('C')
year_one = col_year[0]
      
# the slope is the kinetics 
# from the class file, the lin2exp method returns: adj_year, np.polyval([slope, intercept], adj_year), slope, r_value

(x_adj_year, 
 y_congener_fitted, 
 k_congener, r_value) = pk_milk_base.biomonitoring_eval_(biomonitoring_data=biomonitoring_data_matrix)

print('k_congener_elimination:', k_congener, 'r^2: ', r_value)

# plot the kinetics: 

x_shift = list(map(lambda x: x + year_one, x_adj_year))
plt.plot(x_shift, col_congener, color='red', marker='o', linestyle='',
        label = 'biomonitoring data')
plt.plot(x_shift, np.exp(y_congener_fitted),
         color='blue', marker='', linestyle='-',label = 'fitted')
plt.legend()
plt.xlabel('[year]')
plt.ylabel('Congener concentration  [kg/kg]')
plt.show()


k_congener_elimination: 0.180809023339 r^2:  0.887653121198

calculate the kinetics for a congener for all congeners


In [14]:
# grab some data
cwd =os.getcwd()
congener_biomonitoring_data_location = os.path.join(cwd,'example_data','mikes_et_al_2012.csv')
biomonitoring_data = pd.read_csv(congener_biomonitoring_data_location)

biomonitoring_data_years = biomonitoring_data.ix[:,0]

# find the total number of congeners. 
list_of_congeners = list(biomonitoring_data)[1:] # first value is the 'year' header

# define a color map
colors = plt.cm.rainbow(np.linspace(0, 1, len(list_of_congeners)))

for congener,c in zip(list_of_congeners, colors):
    # create import matrix with single congener
    biomonitoring_data_congener = biomonitoring_data[congener] 
    biomonitoring_data_matrix = np.matrix([biomonitoring_data_years,biomonitoring_data_congener]).T

    col_year = np.array(biomonitoring_data_matrix[:, 0]).flatten('C')
    col_congener = np.array(biomonitoring_data_matrix[:, 1]).flatten('C')
    year_one = col_year[0]
    
    (x_adj_year, 
     y_congener_fitted, 
     k_congener, r_value) = pk_milk_base.biomonitoring_eval_(biomonitoring_data=biomonitoring_data_matrix)
    
    print('congoner:', congener, 'k_elim_congener: ', k_congener,'r_value', r_value)

    x_shift = list(map(lambda x: x + year_one, x_adj_year))
    plt.plot(x_shift, col_congener, color=c, marker='o', linestyle='')
    plt.plot(x_shift, np.exp(y_congener_fitted),
             color=c, marker='', linestyle='-', label = congener)
plt.legend()
plt.xlabel('[year]')
plt.ylabel('Congener concentration  [kg/kg]')
plt.show()


congoner: hcb k_elim_congener:  0.180809023339 r_value 0.887653121198
congoner: ppddt k_elim_congener:  -0.219255807619 r_value -0.731075181687
congoner: ppdde k_elim_congener:  0.0607780233246 r_value 0.625898089212
congoner: betahch k_elim_congener:  0.116200052557 r_value 0.803213402007
congoner: gammahch k_elim_congener:  0.0744602731868 r_value 0.444089964944
congoner: pcb138 k_elim_congener:  0.111532467333 r_value 0.916685949159

Determining lipid mass from bodyweight and lipid fraction.


In [17]:
# import matrix 'bodyweight_and_lipid_fraction_data'
cwd =os.getcwd()
bodyweight_and_lipid_fraction_location = os.path.join(cwd,'example_data','monthly_bodyweight_and_lipid_fraction.csv')
bodyweight_and_lipid_fraction_dataframe = pd.read_csv(bodyweight_and_lipid_fraction_location)
bodyweight_and_lipid_fraction_matrix = bodyweight_and_lipid_fraction_dataframe.as_matrix()

lipid_mass_array = pk_milk_base.lipid_mass_from_bw_and_lipid_fraction(bodyweight_and_lipid_fraction_matrix)

Create, or provide your own, intake intensity curve.


In [20]:
# the curve is then a callable function that can be imported into the mass balance
pk_milk_base.intake_intensity_curve_(method='asymettric_exp_up_and_down', year_peak = 56, peak_intensity=80e-3)

# this function is stored in the pk_milk_base class (or whatever you name it) and can be recalled/plotted 
x_simulation = np.linspace(pk_milk_base.y_start, pk_milk_base.y_end, pk_milk_base.n_steps)
plt.plot(x_simulation, pk_milk_base.intake_intensity_curve(x_simulation))
plt.xlabel('Simulation year from start [year]')
plt.ylabel('Congener intake intensity [ng/(kg.d)]')
plt.show()


/usr/local/lib/python3.5/site-packages/scipy/stats/_stats_mstats_common.py:97: RuntimeWarning: invalid value encountered in double_scalars
  sterrest = np.sqrt((1 - r**2) * ssym / ssxm / df)

Generate the age timeline data.

Because multi-generational math requires keeping track of when each generation was born (or died), employ the agetimeline function.

After this data has been generated, employ the agesplines function to create age spline array that holds the continuous functions describing the mass of fat in the body. The agetimeline function needs to be performed first, because it sets into the class several timeline descriptors based on the provided birth schedule and lifespan.


In [21]:
pk_milk_base.age_timeline_()
pk_milk_base.age_splines_(lipid_mass_array=lipid_mass_array) # lipid_mass_array defined above.

# if you want to plot the age splines output, you can. However, make sure not to plot the derivatives!
for gen in range(0, pk_milk_base.gens):
    plt.plot(x_simulation, pk_milk_base.age_spline[gen](x_simulation))
plt.xlabel('Simulation year from start [year]')
plt.ylabel('Mass of Fat in body [kg]')
plt.show()


default interval age at which mother gives birth:  25 years
calculated birth time-table based on birth age: [  0.  25.  50.  75.] years
generational birth schedule: [25.0, 50.0, 75.0, 100.0] years
generational death schedule: [80.0, 105.0, 130.0, 155.0] years

run the generation_mass_balance

now that everything is set, feel free to run the generation_mass_balance function.


In [22]:
# run the generation mass balance. Turn on quickplot to just plot the data. 
pk_milk_base.generation_mass_balance(quickplot = True)

# data can also be extracted by calling the dydt_solution function for each mass balance: 
# plot only the mass compartments for each generation
for gen in range(0, pk_milk_base.gens*pk_milk_base.odes_in_each_generation)[::2]:
    plt.plot(x_simulation, pk_milk_base.dydt_solution[gen][:])
    plt.xlim(x_simulation[0],x_simulation[-1])
plt.xlabel('Simulation year from start [year]')
plt.ylabel('Mass of Fat in body [kg]')
plt.show()


Answering Q's with the model

  1. What is the time concentration profile of a congener in each of the 5 generations since the introduction of the chemical?
  2. Which generation accumulates the highest concentration of congener?
  3. How does the concentration of a congener vary throughout each generation (e.g., what is the concentration of the congener for 20 year olds of each generation)?

In [23]:
#1. time concentration concentration profile of a single congener in each of the 5 generations
# plot only the chemical mass compartments for each generation
for gen in range(0, pk_milk_base.gens*pk_milk_base.odes_in_each_generation)[1::2]:
    plt.plot(x_simulation, pk_milk_base.dydt_solution[gen][:])
    plt.xlim(x_simulation[0],x_simulation[-1])
    plt.yscale('log')
plt.xlabel('Simulation year from start [year]')
plt.ylabel('Congener concentration in fat kg_cong./kg_bm')
plt.show()



In [24]:
#2.0 Which generation accumulates the highest concentration of a congener. 
max_y =[]
max_x =[]
for gen in range(0, pk_milk_base.gens*pk_milk_base.odes_in_each_generation)[1::2]:
    max_y.append(max(pk_milk_base.dydt_solution[gen][:]))
    max_x.append(x_simulation[pk_milk_base.dydt_solution[gen][:].argmax()])
conc_year_max = pd.DataFrame(np.matrix([max_y,max_x]).T, columns = ['cong_conc','simulation_year'])

#2.1. Calculate the highest concentration of congener in each generation.
print(conc_year_max)

#2.2. Calculate the highest concentration of congener in all generatiosn. 
print('maximum congener concentration and year:')
print(conc_year_max.loc[conc_year_max['cong_conc'].idxmax()])


   cong_conc  simulation_year
0   0.553061             56.5
1   0.536528             56.7
2   0.439806             58.4
maximum congener concentration and year:
cong_conc           0.553061
simulation_year    56.500000
Name: 0, dtype: float64

In [25]:
# 3. How does the concentration of a congener vary throughout each generation 
#(e.g., what is the concentration of the congener for 20 year olds of each generation)?
total_y =[]
for gen in range(0, pk_milk_base.gens*pk_milk_base.odes_in_each_generation)[1::2]:
    total_y.append(pk_milk_base.dydt_solution[gen][:])

all_totals = pd.DataFrame(total_y).T
n_steps = np.int(pk_milk_base.n_steps)

# create a column that tracks the real year. 
all_totals['year'] = np.arange(0,n_steps,1)*(pk_milk_base.delta_t)
# print (all_totals)

# vector of concentrations where everyone is the same age
static_age_selected = []
age_array = 20 # years
for gen in range(1,pk_milk_base.gens+1): 
    static_age_selected.append(gen*age_array)

#initialize a dataframe to store the concentrations at each age
concentration_at_static_age_selected = pd.DataFrame()
for gen in range(0,pk_milk_base.gens): 
    #select rows whose year value is equivalent to the generation static age selected
    concentration_at_static_age_selected = concentration_at_static_age_selected.append(all_totals.loc[all_totals['year']==static_age_selected[gen]])

# convert dataframe to matrix for convenience 
concentration_at_static_age_selected_matrix = concentration_at_static_age_selected.as_matrix()

# the diagonals are the concentrations at each age
print (concentration_at_static_age_selected_matrix.diagonal())
# print(np.arange(0,pk_milk_base.gens))
plt.plot(np.arange(0,pk_milk_base.gens),concentration_at_static_age_selected_matrix.diagonal(),'--o')
plt.semilogy()
plt.ylabel('Congener concentration @ age = ' + str(age_array))
plt.show()


[  2.97560250e-13   1.54688789e-06   4.04497319e-01]

In [26]:
# now do it for all years in between 10 and 80 by 5 year increments.

In [33]:
total_y =[]
for gen in range(0, pk_milk_base.gens*pk_milk_base.odes_in_each_generation)[1::2]:
    total_y.append(pk_milk_base.dydt_solution[gen][:])

all_totals = pd.DataFrame(total_y).T
n_steps = np.int(pk_milk_base.n_steps)

# create a column that tracks the real year. 
all_totals['year'] = np.arange(0,n_steps,1)*(pk_milk_base.delta_t)
# print (all_totals)

# vector of concentrations where everyone is the same age
static_age_selected = []
age_start = 20
age_finish = 80
age_step = 5

legend_labels = []
for age in np.arange(age_start,age_finish, age_step):  # years
    static_age_selected = []
    for gen in range(1,pk_milk_base.gens+1): 
        static_age_selected.append(gen*age)
    
    #initialize a dataframe to store the concentrations at each age
    concentration_at_static_age_selected = pd.DataFrame()
    for gen in range(0,pk_milk_base.gens): 
        #select rows whose year value is equivalent to the generation static age selected
        concentration_at_static_age_selected = concentration_at_static_age_selected.append(
            all_totals.loc[all_totals['year']==static_age_selected[gen]])
        
    # convert dataframe to matrix for convenience 
    concentration_at_static_age_selected_matrix = concentration_at_static_age_selected.as_matrix()

    # the diagonals are the concentrations at each age
    
    plt.plot(np.arange(0,pk_milk_base.gens),concentration_at_static_age_selected_matrix.diagonal(),'--o')
    plt.semilogy()
    legend_labels.append('Cong. Conc. @ age = ' + str(age) + ' years')
    
plt.legend(legend_labels, bbox_to_anchor=(2, 1.05))
plt.ylabel('Congener concentration')
plt.xlabel('Generation')
plt.show()



In [ ]:


In [ ]: