In [174]:
import lifelines
import matplotlib.pyplot as plt
from lifelines.datasets import load_rossi
from lifelines import CoxPHFitter
import pandas as pd
%matplotlib inline
from functools import reduce
from math import log, exp
import operator
In [3]:
rossi = load_rossi()
In [5]:
rossi.head()
Out[5]:
In [29]:
def run_filtered_cox_ph(df, time_col, event_col, covariate_names ):
df = df[covariate_names + [time_col, event_col]]
cf = lifelines.CoxPHFitter()
cf.fit(df, time_col, event_col = event_col, include_likelihood= True)
return cf
cf = run_cox_ph(rossi, "week", "arrest", ["paro", "prio"])
In [30]:
def present_cox_ph(cf):
fig, axes = plt.subplots(nrows=1, ncols=2, sharex=True)
cf.baseline_cumulative_hazard_.plot(ax = axes[0], title = "Baseline Cumulative Hazard")
cf.baseline_survival_.plot(ax = axes[1], title = "Baseline Survival")
present_cox_ph(cf)
In [217]:
class CoxPHModel:
def __init__(self, df, survival_col, cens_col, prior_params, reference_loglik = None, covariate_names = None):
self.prior_params = prior_params
self.survival_col = survival_col
self.cens_col = cens_col
all_covariate_columns = [col for col in df.columns if col not in [cens_col, survival_col]]
if covariate_names == None:
self.covariate_names = all_covariate_columns
else:
self.covariate_names = covariate_names
self.df = df[self.covariate_names + [self.survival_col, self.cens_col]]
self.mask = [x in self.covariate_names for x in all_covariate_columns]
self._cf = None
if reference_loglik == None:
reference_loglik = self.loglik()
self.reference_loglik = reference_loglik
def prior(self):
parameter_contributions = [x[1] if x[0] else (1 - x[1]) for x in zip(self.mask , self.prior_params)]
return reduce(operator.mul, parameter_contributions, 1)
def _run(self):
self._cf = lifelines.CoxPHFitter()
self._cf.fit(self.df, self.survival_col, event_col = self.cens_col, include_likelihood= True)
def loglik(self):
if self._cf is None:
self._run()
return self._cf._log_likelihood
def summary(self):
if self._cf is None:
self._run()
return self._cf.summary.index, self._cf.summary["coef"], (self._cf.summary["se(coef)"] * self._cf.summary["se(coef)"])
def bayesian_information_critera(self):
size = len(self.covariate_names)
n = self.df.shape[0]
prior = self.prior()
loglik = self.loglik()
return (size * log(n)) - (2 * (loglik - self.reference_loglik)) - (2 * log(prior))
In [278]:
class BMACox:
def __init__(self, x, survival_col, cens_col, priors = None):
self.df = x
self.survival_col = survival_col
self.cens_col = cens_col
if priors == None:
self.priors = [0.5] * (len(self.df.columns) - 2) #Uniformative prior
else:
self.priors = priors
self.reference_loglik = None
self.full_model = self.create_model(None)
self._set_reference_loglik()
def _generate_model_definnitions(self):
names, coefs, var = self.full_model.summary()
model1 = ["fin", "prio"]
model2 = ["race", "mar"]
model6 = ["race", "age"]
model3 = ["prio", "race"]
model4 = ["prio", "race", "mar"]
model5 = ["prio", "age", "mar"]
return [model1, model2, model3, model4, model5, model6]
def _weight_by_posterior(self, values, posterior):
def add_dataframes(dfone, dftwo):
return dfone.add(dftwo, fill_value = 0)
output = zip(values, posterior)
weighted = [x[0] * x[1] for x in output]
running_total = weighted[0]
for i in range(1, len(weighted)):
running_total = add_dataframes(running_total, weighted[i])
return running_total
def run(self):
models = self._generate_model_definnitions()
models = [self.create_model(x) for x in models]
bics = [x.bayesian_information_critera() for x in models]
self.posterior_probabilities = []
min_bic = min(bics)
summation = sum( [exp(-0.5 * (bic - min_bic)) for bic in bics])
for bic in bics:
posterior = (exp(-0.5 * (bic - min_bic)))/summation
self.posterior_probabilities.append(posterior)
coefficiencts_by_model = [x.summary()[1] for x in models]
sterr_by_model = [x.summary()[2] for x in models]
self.coefficients_weighted = self._weight_by_posterior(coefficiencts_by_model, self.posterior_probabilities)
self.sterr_weighted = self._weight_by_posterior(sterr_by_model, self.posterior_probabilities)
return self.coefficients_weighted, self.sterr_weighted
def create_model(self, covariate_names):
return CoxPHModel(self.df, self.survival_col, self.cens_col, self.priors, self.reference_loglik, covariate_names)
def _set_reference_loglik(self):
self.reference_loglik = self.full_model.loglik()
In [279]:
bmaCox = BMACox(rossi, "week", "arrest")
posterior = bmaCox.run()
posterior[0]
Out[279]:
In [226]:
for x in posterior:
print(x)
In [257]:
weighted = [x[0] * x[1] for x in p]
weighted
Out[257]:
In [260]:
def add_dataframes(dfone, dftwo):
return dfone.add(dftwo, fill_value = 0)
running_total = weighted[0]
for i in range(1, len(weighted)):
running_total = add_dataframes(running_total, weighted[i])
running_total
Out[260]:
In [244]:
pd.DataFrame([0], columns = ["coef"])
Out[244]:
In [ ]: