In [46]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
plt.rcParams["figure.figsize"] = (15,7)
sns.set()
In [2]:
import pandas as pd
from lifelines.datasets import load_dd
data = load_dd()
In [50]:
data.head()
Out[50]:
In [31]:
from lifelines import KaplanMeierFitter
kmf1 = KaplanMeierFitter()
kmf2 = KaplanMeierFitter()
In [ ]:
T = data["duration"]
E = data["observed"]
In [9]:
kmf.fit(T, event_observed=E)
Out[9]:
In [22]:
kmf.survival_function_.plot()
plt.title('Survival function of political regimes');
In [23]:
kmf.plot()
Out[23]:
In [24]:
kmf.median_
Out[24]:
In [49]:
ax = plt.subplot(111)
dem = (data["democracy"] == "Democracy")
kmf1.fit(T[dem], event_observed=E[dem], label="Democratic Regimes")
kmf1.plot(ax=ax)
kmf2.fit(T[~dem], event_observed=E[~dem], label="Non-democratic Regimes")
kmf2.plot(ax=ax)
plt.ylim(0, 1);
plt.title("Lifespans of different global regimes");
In [35]:
kmf2.subtract(kmf1).plot()
Out[35]:
In [26]:
from lifelines.statistics import logrank_test
results = logrank_test(T[dem], T[~dem], E[dem], E[~dem], alpha=.99)
results.print_summary()
In [27]:
regime_types = data['regime'].unique()
for i,regime_type in enumerate(regime_types):
ax = plt.subplot(2, 3, i+1)
ix = data['regime'] == regime_type
kmf.fit( T[ix], E[ix], label=regime_type)
kmf.plot(ax=ax, legend=False)
plt.title(regime_type)
plt.xlim(0, 50)
if i==0:
plt.ylabel('Frac. in power after $n$ years')
plt.tight_layout()
In [36]:
from lifelines.datasets import load_rossi
from lifelines import CoxPHFitter
rossi_dataset = load_rossi()
cph = CoxPHFitter()
cph.fit(rossi_dataset, duration_col='week', event_col='arrest')
cph.print_summary() # access the results using cph.summary
In [38]:
X = rossi_dataset.drop(["week", "arrest"], axis=1)
cph.predict_partial_hazard(X)
cph.predict_survival_function(X)
Out[38]:
In [39]:
cph = CoxPHFitter()
cph.fit(rossi_dataset, duration_col='week', event_col='arrest')
cph.plot_covariate_groups('prio', [0, 5, 10, 15])
Out[39]:
In [40]:
from lifelines.datasets import load_dd
from lifelines import KaplanMeierFitter
data = load_dd()
democracy_0 = data.loc[data['democracy'] == 'Non-democracy']
democracy_1 = data.loc[data['democracy'] == 'Democracy']
kmf0 = KaplanMeierFitter()
kmf0.fit(democracy_0['duration'], event_observed=democracy_0['observed'])
kmf1 = KaplanMeierFitter()
kmf1.fit(democracy_1['duration'], event_observed=democracy_1['observed'])
fig, axes = plt.subplots()
kmf0.plot_loglogs(ax=axes)
kmf1.plot_loglogs(ax=axes)
axes.legend(['Non-democracy', 'Democracy'])
plt.show()
In [ ]:
def cohort_period(df):
loan_funded = df.index.get_level_values('loan_funded')
df['cohort_period'] = [
lf for lf in loan_funded]
return df
cohorts = cohorts.groupby(level=0).apply(cohort_period)
cohorts.reset_index(inplace=True)
cohorts.loc[:, 'cohort_period'] = \
pd.to_datetime(cohorts['cohort_period'])
cohorts.set_index(['loan_created', 'cohort_period'], inplace=True)
cohorts = (df_or.groupby([df_or['loan_created'],
df_or['loan_funded'].fillna('None')])
.agg({'loan_funded_bool': 'sum'})
.groupby(level=[0]).cumsum()
)
# create a Series holding the total size of each cohort group
cohort_group_size = cohorts['loan_funded_bool'].groupby(level=0).sum()
conversion_rate = (
cohorts['loan_funded_bool'].unstack(0)
.divide(cohort_group_size, axis=1)
)
conversion_rate.drop(conversion_rate.index[0], inplace=True)
# conversion_rate.fillna(method='backfill', inplace=True)
# conversion_rate.fillna(0, inplace=True)
ax = conversion_rate.plot(figsize=(15, 5), x_compat=True)
ax.xaxis.set_major_locator(mdates.MonthLocator())
plt.tick_params(axis='both', which='major', labelsize=13)
ax.set_xlim([min_date, max_date])
plt.legend(bbox_to_anchor=(1.04,1))
plt.show()