In [1]:
%pylab inline
from scipy import stats
from lifelines.estimation import KaplanMeierFitter
from lifelines.statistics import logrank_test
from lifelines import CoxPHFitter


Populating the interactive namespace from numpy and matplotlib

In [2]:
t1 = stats.expon(loc = 0, scale = 3.5).rvs(50)
t2 = stats.expon(loc = 0, scale = 6.5).rvs(60)
data = pd.DataFrame({'time': concatenate((t1,t2)),
                     'group': concatenate((ones(t1.shape),zeros(t2.shape))),
                     'event': concatenate((ones(t1.shape),ones(t2.shape)))})

print data.head()

kmf1 = KaplanMeierFitter()
kmf1.fit(data.time[data.group==1])
print 't1 median:',kmf1.median_

kmf2 = KaplanMeierFitter()
kmf2.fit(data.time[data.group==0])
print 't2 median:',kmf2.median_

summary, pvalue, test_results = logrank_test(data.time[data.group==1], data.time[data.group==0])
print summary


   event  group      time
0      1      1  6.962576
1      1      1  1.044209
2      1      1  2.609977
3      1      1  0.177211
4      1      1  1.131114
t1 median: 2.24020927132
t2 median: 5.05074378037
Results
   df: 1
   alpha: 0.95
   t 0: -1
   test: logrank
   null distribution: chi squared

   __ p-value ___|__ test statistic __|__ test results __
         0.00000 |              24.511 |     True   
Results
   df: 1
   alpha: 0.95
   t 0: -1
   test: logrank
   null distribution: chi squared

   __ p-value ___|__ test statistic __|__ test results __
         0.00000 |              24.511 |     True   

In [3]:
figure(1,figsize=(12,5))
fill_between(x = array(kmf1.confidence_interval_.index,dtype=float),
             y2 = kmf1.confidence_interval_['KM-estimate_upper_0.95'].values,
             y1 = kmf1.confidence_interval_['KM-estimate_lower_0.95'].values,
             color = 'tomato', alpha=0.3)
fill_between(x = array(kmf2.confidence_interval_.index,dtype=float),
             y2 = kmf2.confidence_interval_['KM-estimate_upper_0.95'],
             y1 = kmf2.confidence_interval_['KM-estimate_lower_0.95'],
             color = 'slateblue', alpha=0.3)

plot(kmf1.survival_function_.index,kmf1.survival_function_['KM-estimate'],label='lambda = 3.0',color='tomato',lw=2)
plot(kmf2.survival_function_.index,kmf2.survival_function_['KM-estimate'],label='lambda = 3.5',color='slateblue',lw=2)
legend()

annotate(xy = (0.5,1), s='logrank p = %1.2g' % pvalue, xycoords = 'axes fraction',size ='x-large', ha = 'center',va='top')


Out[3]:
<matplotlib.text.Annotation at 0x945d5f8>

In [4]:
cf = CoxPHFitter(alpha=0.95, tie_method='Efron')
cf.fit(data, duration_col='time', event_col='event')

cf.summary()


           coef  exp(coef)  se(coef)        z         p  lower 0.95  upper 0.95
group  1.024943   2.786938  0.214941  4.76849  0.000002    0.603574    1.446313
C:\Anaconda\lib\site-packages\lifelines\estimation.py:1106: FutureWarning: in the future, boolean array-likes will be handled as a boolean array index
  ind_hazards[self.durations <= t].sum())

In [ ]: