In [1]:
df = pd.DataFrame.from_csv('https://raw.githubusercontent.com/cmrivers/ebola/master/country_timeseries.csv',
index_col=0)
df = df.sort_index()
df = df.fillna(method='ffill')
In [2]:
titles = [k for k in df.columns if 'cases' in k.lower()]
df.plot(y=titles, kind='area')
legend()
Out[2]:
In [3]:
titles = [k for k in df.columns if 'Deaths' in k]
df.plot(y=titles,kind='area')
legend()
Out[3]:
In [5]:
df['total deaths'] = df[titles].sum(axis =1)
df.plot(y='total deaths',
title='Total Deaths in \n 2014 Ebola Outbreak')
ylabel('Total Deaths')
Out[5]:
In [6]:
import seaborn as sn
df['log total deaths'] = log10(df['total deaths'].values)
sn.lmplot('Day','log total deaths', df)
Out[6]:
In [7]:
import statsmodels.formula.api as sm
ols = sm.OLS(df['Day'].values, df['log total deaths'].values)
ols.fit().summary()
Out[7]:
In [ ]: