In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import pandas as pd
import numpy as np


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [2]:
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
plotly.offline.init_notebook_mode(connected=True)


load pp_survival data from a random survival model


In [3]:
ppsurv = pd.read_csv('plotly_example_data.csv')

In [4]:
ppsurv.head()


Out[4]:
Unnamed: 0 iter model_cohort sex level_3 event_time survival
0 0 0 test model female 0 0.000000 1.000000
1 1 0 test model female 1 2.615961 1.000000
2 2 0 test model female 2 3.584694 0.977162
3 3 0 test model female 3 4.379338 0.952078
4 4 0 test model female 4 6.253546 0.945339

plot posterior predicted survival time by sex


In [5]:
ppsummary = ppsurv.groupby(['sex','event_time'])['survival'].agg({
        '95_lower': lambda x: np.percentile(x, 2.5),
        '95_upper': lambda x: np.percentile(x, 97.5),
        '50_lower': lambda x: np.percentile(x, 25),
        '50_upper': lambda x: np.percentile(x, 75),
        'median': lambda x: np.percentile(x, 50),
    }).reset_index()

In [6]:
ppsummary[ppsummary['sex']=='female'].tail()


Out[6]:
sex event_time median 50_lower 95_lower 95_upper 50_upper
72 female 18.543842 0.338076 0.283789 0.182441 0.519674 0.405841
73 female 18.656898 0.331524 0.275271 0.176058 0.507488 0.398727
74 female 18.932325 0.320479 0.269188 0.176320 0.492980 0.384687
75 female 19.811832 0.306252 0.251404 0.157844 0.471677 0.367278
76 female 20.000000 0.284725 0.229034 0.134397 0.467671 0.344815

In [7]:
shade_colors = dict(male='rgba(0, 128, 128, {})', female='rgba(214, 12, 140, {})')
line_colors = dict(male='rgb(0, 128, 128)', female='rgb(214, 12, 140)')
ppsummary.sort_values(['sex', 'event_time'], inplace=True)

In [8]:
data5 = list()
for grp, grp_df in ppsummary.groupby('sex'):
    x = list(grp_df['event_time'].values)
    x_rev = x[::-1]
    y_upper = list(grp_df['50_upper'].values)
    y_lower = list(grp_df['50_lower'].values)
    y_lower = y_lower[::-1]
    y2_upper = list(grp_df['95_upper'].values)
    y2_lower = list(grp_df['95_lower'].values)
    y2_lower = y2_lower[::-1]
    y = list(grp_df['median'].values)
    my_shading50 = go.Scatter(
        x = x + x_rev,
        y = y_upper + y_lower,
        fill = 'tozerox',
        fillcolor = shade_colors[grp].format(0.3),
        line = go.Line(color = 'transparent'),
        showlegend = True,
        name = '{} - 50% CI'.format(grp),
    )
    my_shading95 = go.Scatter(
        x = x + x_rev,
        y = y2_upper + y2_lower,
        fill = 'tozerox',
        fillcolor = shade_colors[grp].format(0.1),
        line = go.Line(color = 'transparent'),
        showlegend = True,
        name = '{} - 95% CI'.format(grp),
    )
    my_line = go.Scatter(
        x = x,
        y = y,
        line = go.Line(color=line_colors[grp]),
        mode = 'lines',
        name = grp,
    )
    data5.append(my_line)    
    data5.append(my_shading50)
    data5.append(my_shading95)

In [9]:
layout5 = go.Layout(
    yaxis=dict(
        title='Survival (%)',
        #zeroline=False,
        tickformat='.0%',
    ),
    xaxis=dict(title='Days since enrollment')
)

In [10]:
py.iplot(go.Figure(data=data5, layout=layout5), filename='survivalstan/posterior-predicted-values')


Out[10]:

In [ ]: