In [1]:
import lifemodels as lms
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
%matplotlib inline
os.chdir("/Users/user/Work/lifemodels/examples")
lifemodelslifemodels is a Python package for fitting lifespan data to a number of models using MLE. It also implements GLM (generalized linear model) for those lifespan distributions.
It is the next version of deday. deday stands for Demography Data Analyses. The older versions can be found here.
Read the minimal example for a simple workflow in IPython Notebook
Read the documentation for instructions.
If you use deday for published research, please contact me as lifemodels has not yet been published.
To install, run pip install git+https://github.com/ctzhu/lifemodels
• Benjamin Gompertz & William Makeham.
• Gompertz Makeham law of mortality: $H(t)=\alpha e^{\beta t} + C$
• Example: Estimated probability of a person dying at each age, for the U.S. in 2003
| |
|
|
|---|
.survt_df() methodThe effect of retarded growth upon the length of life span and upon the ultimate body size. J Nutr. 1935;10:63–79
Diet and Sex. III type diet being the richest.
In [2]:
df = pd.read_csv('mckay.txt', sep=' ')
df['Time1'] = df.Time0
#Time-to-event data, Time0 is the left bound, Time1 is the right bound.
s_df = lms.s_models.survt_df(df)
df.head()
Out[2]:
In [3]:
gp2_model = lms.s_models.distfit_df(s_df, 'gp2')
gp3_model = lms.s_models.distfit_df(s_df, 'gp3')
wb2_model = lms.s_models.distfit_df(s_df, 'wb2')
wb3_model = lms.s_models.distfit_df(s_df, 'wb3')
gl4_model = lms.s_models.distfit_df(s_df, 'gl4')
$H(t)=C$
Exponential Decay
$H(t)=(\kappa/\lambda)\cdot(t/\lambda)^{\kappa-1}$
$log(H(t))=log(\kappa \cdot \lambda^{-\kappa})+(\kappa-1)\cdot log(x)$
Weibull model (Mortality increase with time in a power function relationship) 'wb2'
$H(t)=\alpha e^{\beta t}$
$log(H(t))=log(\alpha)+ \beta t $
Gompertz model (Mortality increase with time Exponentially) 'gp2'
$H(t)=\alpha e^{\beta t} - \alpha + C$
Gompertz-Makehamm model 'gp3'
$H(t)=(\kappa/\lambda)\cdot(t/\lambda)^{\kappa-1} + C$
Weibull-Makehamm model 'wb3'
$H(t)=\frac{\alpha e^{\beta t}}{d e^{\beta t} + 1} - \frac{\alpha}{d+1} + C$
Gompertz-logistic model 'gl4'
In [4]:
gp3_model.mdl_all_free
Out[4]:
In [5]:
gp3_model.mdl_all_cnst
Out[5]:
In [6]:
gp3_model.var
Out[6]:
.logq() method.logq(key, q), key is genotype-food combination here. q is that target % of death, default to 50%, therefore by default it returns the estimate for medium lifespan.
.logq() method and .logq_nc() method are similar: the latter ignores the Makehamm term ($C$), if there is one.
In [7]:
(gp3_model.logq(('I', 'Female')), gp3_model.logq_nc(('I', 'Female')))
Out[7]:
In [8]:
medium_df = pd.DataFrame({'With_C' : map(gp3_model.logq, gp3_model.mdl_all_free.index),
'Without_C': map(gp3_model.logq_nc, gp3_model.mdl_all_free.index)},
index=gp3_model.mdl_all_free.index)
np.exp(medium_df)
Out[8]:
In [9]:
(gp3_model.logm(('I', 'Female')), gp3_model.logm_nc(('I', 'Female')))
Out[9]:
In [10]:
gp3_model.logt_var(('I', 'Female'), 6.6869775131919171)
Out[10]:
In [11]:
f, axes = plt.subplots(2, 3, figsize=(14, 8), sharex=True, sharey=True)
axes = axes.ravel()
for ax, key in zip(axes, gp3_model.mdl_all_free.index.tolist()):
gp3_model.plot(key, np.linspace(0,1400,100),ax,'hzd')
ax.set_ylim((-10,-2))
ax.set_title(key)
plt.suptitle('log-harzard plot', fontsize=20)
Out[11]:
In [12]:
f, axes = plt.subplots(2, 3, figsize=(14, 8), sharex=True, sharey=True)
axes = axes.ravel()
for ax, key in zip(axes, gp3_model.mdl_all_free.index.tolist()):
gp3_model.plot(key, np.linspace(0,1400,100),ax,'sf')
ax.set_ylim((-0.01,1.01))
ax.set_title(key)
plt.suptitle('survival plot', fontsize=20)
Out[12]:
In [13]:
f, (ax, cax) = plt.subplots(1, 2, gridspec_kw={'width_ratios':[20,1], 'wspace': 0.5})
wb2_model.contourf_2D(('I','Female'),
ax, cax,
(np.linspace(-5, 5, 41),
np.linspace(-4, 4, 45)))
plt.suptitle('Weibull Model 2D Conf. Region')
Out[13]:
In [14]:
f, (ax, cax) = plt.subplots(1, 2, gridspec_kw={'width_ratios':[20,1], 'wspace': 0.5})
gp2_model.contourf_2D(('I','Female'),
ax, cax,
(np.linspace(-5, 5, 41),
np.linspace(-4, 4, 45)))
plt.suptitle('Gompertz Model 2D Conf. Region')
Out[14]:
In [15]:
f, (ax, cax) = plt.subplots(1, 2, gridspec_kw={'width_ratios':[20,1], 'wspace': 0.5})
gp2_model.contourf_2D(('I','Female'),
ax, cax,
(np.linspace(-5, 5, 41),
np.linspace(-4, 4, 45)),
levels=np.linspace(150,5000,10))
plt.suptitle('Gompertz Model 2D Conf. Region')
Out[15]:
In [16]:
f, (ax1, ax2, ax3, cax) = plt.subplots(1, 4,
gridspec_kw={'width_ratios':[20,20,20,1], 'wspace': 0.5},
figsize=(16,4))
gp3_model.contourf_2D(('I','Female'),
ax1, cax,
(np.linspace(-5, 5, 41),
np.linspace(-4, 4, 45),
np.linspace(0,0,1)))
gp3_model.contourf_2D(('I','Female'),
ax2, cax,
(np.linspace(-5, 5, 41),
np.linspace(0,0,1),
np.linspace(-4, 4, 45)))
gp3_model.contourf_2D(('I','Female'),
ax3, cax,
(np.linspace(0,0,1),
np.linspace(-5, 5, 41),
np.linspace(-4, 4, 45)))
plt.suptitle('Gompertz Model 3D Conf. Region')
Out[16]:
In [17]:
wb2_glm = lms.s_models.model(wb2_model)
In [18]:
wb2_glm.fit_partial_model('Diet*Sex', [0])
Out[18]:
In [19]:
wb2_glm.fit_partial_model('Diet*Sex', [1])
Out[19]:
> S1 <- survreg(Surv(Time0) ~ Diet*Sex, dist='weibull', data=df)
> summary(S1)
Call:
survreg(formula = Surv(Time0) ~ Diet * Sex, data = df, dist = "weibull")
Value Std. Error z p
(Intercept) 6.7515 0.0827 81.680 0.00e+00
DietII 0.0776 0.1156 0.671 5.02e-01
DietIII 0.0462 0.1211 0.382 7.03e-01
SexMale -0.4243 0.1323 -3.208 1.34e-03
DietII:SexMale 0.4459 0.1885 2.366 1.80e-02
DietIII:SexMale 0.4725 0.1881 2.512 1.20e-02
Log(scale) -0.9506 0.0834 -11.393 4.56e-30
Scale= 0.387
Weibull distribution
Loglik(model)= -764.8 Loglik(intercept only)= -771.7
Chisq= 13.89 on 5 degrees of freedom, p= 0.016
Number of Newton-Raphson Iterations: 7
n= 106
In [20]:
wb2_glm.fit_partial_model('Diet*Sex', [])
Out[20]:
mortdist.f90
subroutine lg3LLK(sx1, sx2, sy, su, ss, sc, n)
double precision, intent(in) :: sx1(n), sx2(n)
double precision, intent(out) :: sy
double precision :: su, ss, sc
integer n, i
real(kind=10) :: x1(n), x2(n)
real(kind=10) :: y
real(kind=10) :: v1, v2, u, s, c
x1= sx1
x2= sx2
u = su
s = ss
c = sc
y = 0.0_10
do 100 i=1, n
v1 = exp((u-x1(i))/s)
if (x1(i) == x2(i)) then
y = y+log(v1)-log(1.0_10+v1) &
-c*x1(i) &
+log(1.0_10/s/(1.0_10+v1)+c)
else
v2 = exp((u-x2(i))/s)
y = y+log(v1/(1.0_10+v1)/exp(c*x1(i)) &
-v2/(1.0_10+v2)/exp(c*x2(i)))
end if
100 continue
sy = dble(y)
end subroutine lg3LLK
Getting CDF, PDF, SF, and H from one call to avoid calculate the same thing more than once.
mortdist_cpsh.f90
subroutine gp2_cpsh(p, x, cdf, pdf, sf, hzd, m, n)
double precision, intent(in) :: p(m), x(n)
double precision, intent(out):: cdf(n), pdf(n), sf(n), hzd(n)
double precision :: a, b
double precision :: ebx(n)
integer :: m, n
a = p(1)
b = p(2)
ebx = exp(b*x)
hzd = a*ebx
sf = exp(-a/b*(ebx-1.0d0))
pdf = hzd*sf
cdf = 1.0d0-sf
end subroutine gp2_cpsh