This notebook demonstrates mediation analysis when the mediator and outcome are duration variables, modeled using proportional hazards regression. These examples are based on simulated data.
In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.stats.mediation import Mediation
Make the notebook reproducible.
In [2]:
np.random.seed(3424)
Specify a sample size.
In [3]:
n = 1000
Generate an exposure variable.
In [4]:
exp = np.random.normal(size=n)
Generate a mediator variable.
In [5]:
def gen_mediator():
mn = np.exp(exp)
mtime0 = -mn * np.log(np.random.uniform(size=n))
ctime = -2 * mn * np.log(np.random.uniform(size=n))
mstatus = (ctime >= mtime0).astype(np.int)
mtime = np.where(mtime0 <= ctime, mtime0, ctime)
return mtime0, mtime, mstatus
Generate an outcome variable.
In [6]:
def gen_outcome(otype, mtime0):
if otype == "full":
lp = 0.5*mtime0
elif otype == "no":
lp = exp
else:
lp = exp + mtime0
mn = np.exp(-lp)
ytime0 = -mn * np.log(np.random.uniform(size=n))
ctime = -2 * mn * np.log(np.random.uniform(size=n))
ystatus = (ctime >= ytime0).astype(np.int)
ytime = np.where(ytime0 <= ctime, ytime0, ctime)
return ytime, ystatus
Build a dataframe containing all the relevant variables.
In [7]:
def build_df(ytime, ystatus, mtime0, mtime, mstatus):
df = pd.DataFrame({"ytime": ytime, "ystatus": ystatus,
"mtime": mtime, "mstatus": mstatus,
"exp": exp})
return df
Run the full simulation and analysis, under a particular population structure of mediation.
In [8]:
def run(otype):
mtime0, mtime, mstatus = gen_mediator()
ytime, ystatus = gen_outcome(otype, mtime0)
df = build_df(ytime, ystatus, mtime0, mtime, mstatus)
outcome_model = sm.PHReg.from_formula("ytime ~ exp + mtime", status="ystatus", data=df)
mediator_model = sm.PHReg.from_formula("mtime ~ exp", status="mstatus", data=df)
med = Mediation(outcome_model, mediator_model, "exp", "mtime",
outcome_predict_kwargs={"pred_only": True})
med_result = med.fit(n_rep=20)
print(med_result.summary())
Run the example with full mediation
In [9]:
run("full")
Run the example with partial mediation
In [10]:
run("partial")
Run the example with no mediation
In [11]:
run("no")