In [2]:
import lifelines
from lifelines import KaplanMeierFitter
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
In [3]:
%matplotlib inline
plt.style.available
plt.style.use("bmh")
In [4]:
cns_all = pd.read_csv("survival.csv")
In [5]:
del cns_all["Unnamed: 0"]
In [6]:
C = cns_all["Status (Dead or alive)"].apply(lambda x : True if x == "DEAD" else False)
In [7]:
T = cns_all["Duration from diagnosis to death or last follow up"]
In [8]:
kmf = KaplanMeierFitter()
In [9]:
kmf.fit(T, event_observed=C )
Out[9]:
In [10]:
kmf.plot(figsize=(12, 8))
Out[10]:
In [11]:
kmf.survival_function_.plot(figsize=(12, 8))
plt.title('Survival function of CNS');
In [12]:
cns_all["Age"] = cns_all["Age"].apply(lambda x : 1 if x > 60 else 0)
cns_all["Race"] = cns_all["Race"].apply(lambda x : "White" if x == "White" else "Other")
def stage_mask(stage):
if stage == "I" or stage == "II":
return "I/II"
elif stage == "III" or stage == "IV":
return "III/IV"
else:
return "Missing"
def LDH_mask(ldh):
if ldh == "NO":
return 0
elif ldh == "YES":
return 1
else:
return None
cns_all["Stage"] = cns_all["Stage"].apply(stage_mask)
cns_all["IPI Score"] = cns_all["IPI Score"].apply(lambda x: "L/LI" if x < 3 else "H/HI")
cns_all["PS"] = cns_all["PS"].apply(lambda x: 1 if x > 1 else 0)
In [13]:
cns_all["LDH"] = cns_all["LDH"].apply(LDH_mask)
cns_all["B Symp 1"] = cns_all["B Symp 1"].apply(lambda x : 0 if x == "NO" else 1)
cns_all["B symp 2"] = cns_all["B symp 2"].apply(lambda x : 0 if x == "NO" else 1)
cns_all["B symp 3"] = cns_all["B symp 3"].apply(lambda x : 0 if x == "NO" else 1)
In [14]:
b_symp_any = cns_all["B Symp 1"] + cns_all["B symp 2"] + cns_all["B symp 3"]
cns_all["B Symp"] = b_symp_any.apply(lambda x: 1 if x > 0 else 0)
In [15]:
del cns_all["B Symp 1"]
del cns_all["B symp 2"]
del cns_all["B symp 3"]
In [16]:
cns_all[">1 extranodal"] = cns_all[">1 extranodal"].apply(lambda x : 0 if x == "NO" else 1)
cns_all["BM Involv"] = cns_all["BM Involv"].apply(LDH_mask)
cns_all["PB Involv"] = cns_all["PB Involv"].apply(LDH_mask)
cns = cns_all["CNS "].apply(LDH_mask)
cns_all["cns"] = cns
del cns_all["CNS "]
cns_all["cns"] = cns_all["cns"].apply(lambda x: 1 if x == 1 else 0)
cns_all
Out[16]:
In [17]:
cns_all["Observed"] = C
cns_all["Duration"] = T
del cns_all["Status (Dead or alive)"]
del cns_all["Duration from diagnosis to death or last follow up"]
In [18]:
ax = plt.subplot(111)
cns_pos = (cns_all["cns"] == 1)
kmf.fit(T[cns_pos], event_observed=C[cns_pos], label="CNS Positive")
kmf.plot(ax=ax, figsize=(14, 9), flat=False, show_censors=True, ci_show=False)
print(kmf.median_)
kmf.fit(T[~cns_pos], event_observed=C[~cns_pos], label="CNS Negative")
kmf.plot(ax=ax, figsize=(14, 9), show_censors=True, ci_show=False)
print(kmf.median_)
plt.ylim(0,1);
plt.xlabel("Time (months)", **font)
plt.ylabel("Overall survival probability", **font)
In [19]:
kmf.fit(T[cns_pos], event_observed=C[cns_pos], label="CNS Positive").median_
Out[19]:
In [20]:
from lifelines.statistics import logrank_test
logrank_test(T[cns_pos], T[~cns_pos], C[cns_pos], C[~cns_pos], alpha=.99 )
Out[20]:
In [41]:
categories = cns_all['Race'].unique()
for i,category in enumerate(categories):
ax = plt.subplot(2,3,i+1)
ix = cns_all['Race'] == category
kmf.fit( T[ix], C[ix], label=category )
kmf.plot(ax=ax, legend=False, figsize=(15, 10))
plt.title(category)
plt.xlim(0,50)
plt.tight_layout()
In [63]:
def PB_mask(pb):
if pb == 0:
return "No"
elif pb == 1:
return "Yes"
else:
return "Missing"
In [65]:
cns_all["PB Involv"] = cns_all["PB Involv"].apply(PB_mask)
In [98]:
categories = cns_all['PB Involv'].unique()
for i,category in enumerate(categories):
ax = plt.subplot(2,3,i+1)
ix = cns_all['PB Involv'] == category
kmf.fit( T[ix], C[ix], label=category )
kmf.plot(ax=ax, legend=False, figsize=(15, 10))
plt.title(category)
plt.xlim(0,50)
plt.tight_layout()
In [125]:
cns_pos = cns_all[cns_all['cns'] == 1]
cns_pos = cns_pos.reset_index()
In [126]:
column = 'Gender'
categories = cns_pos[column].unique()
for i,category in enumerate(categories):
ax = plt.subplot(2,3,i+1)
ix = cns_pos[column] == category
kmf.fit( T[ix], C[ix], label=category )
kmf.plot(ax=ax, legend=False, figsize=(15, 10))
plt.title(category)
plt.xlim(0,50)
plt.tight_layout()
In [118]:
summary, p_value, test_results = logrank_test(T[cns_pos], T[~cns_pos], C[cns_pos], C[~cns_pos], alpha=.99 )
print summary
In [128]:
cns_pos.describe()
Out[128]:
In [ ]: