In [2]:
# coding:utf8
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.font_manager as fm
import numpy as np
%matplotlib inline
In this example, we show the current year incidence up to given week.
Along with the current incidence, we present the following intensity thresholds:
Low activity threshold: estimated epidemic threshold based on historical levels. Minimum: incidence equivalent to 5 cases.
High activity threshold: incidence considered high based on historical levels. Minimum: incidence equivalent to 10 cases.
Very high activity threshold: incidence considered very high based on historical levels. Minimum: incidence equivalent to 20 cases.
In [3]:
dfincidence = pd.read_csv('../data/current_estimated_values.csv', encoding='utf-8')
dftypical = pd.read_csv('../data/mem-typical.csv', encoding='utf-8')
dfthresholds = pd.read_csv('../data/mem-report.csv', encoding='utf-8')
level_dict = {'L0': 'Baixa', 'L1': 'Epidêmica',
'L2': 'Alta', 'L3': 'Muito alta'}
In [4]:
dfincidence.columns
Out[4]:
In [5]:
dfincidence.head(10)
Out[5]:
In [6]:
dftypical.head(10)
Out[6]:
In [7]:
dfthresholds.tail(10)
Out[7]:
Entries with dfthresholds['se típica do inicio do surto'] = NaN have activity too low for proper epidemic threshold definition
In [8]:
df = pd.merge(dfincidence, dfthresholds[['UF','Unidade da Federação']], on='UF')
df.head(10)
Out[8]:
In [9]:
def plot_timeseries(df, dfthres, dftyp):
# Set font properties
fontproplgd = fm.FontProperties('Oswald')
fontproplgd.set_size(28)
fontproplbl = fm.FontProperties('Oswald')
fontproplbl.set_size(42)
fontpropticks = fm.FontProperties('Oswald')
fontpropticks.set_size(24)
# Set figure size
fig, ax = plt.subplots(1, 1, figsize = [20, 20])
# Set ymax at least = 1:
max_typ = dftyp['corredor alto'].max()
max_thres = dfthres['intensidade muito alta'].max()
max_ts = df[['SRAG', '50%']].max().max()
maxval1 = max([max_typ, max_thres, max_ts, 1])
ax.set_ylim([0,maxval1])
# Plot lines and regions:
ax.fill_between(dftyp['epiweek'], 0, dftyp['corredor baixo'], color='green', alpha=0.5)
ax.fill_between(dftyp['epiweek'], dftyp['corredor baixo'], dftyp['corredor mediano'], color='yellow', alpha=0.5)
ax.fill_between(dftyp['epiweek'], dftyp['corredor mediano'], dftyp['corredor alto'], color='orange', alpha=0.5)
try:
df.plot(ax=ax, x='epiweek', y='SRAG', color='k', lw=3, label='Casos notificados')
except:
pass
try:
minweek = int(df.loc[df['Situation']=='estimated','epiweek'].min())
df_est = df[df['epiweek']>=minweek]
df_est.plot(ax=ax, x='epiweek', y='50%', color='r', lw=3, label='Casos estimados')
df_est.plot(ax=ax, x='epiweek', y='2.5%', color='r', lw=3, style='--', label='Intervalo de confiança')
df_est.plot(ax=ax, x='epiweek', y='97.5%', color='r', lw=3, style='--', label='')
except:
pass
try:
minweek = int(df.loc[df['Situation']=='unknown','epiweek'].min())
df_est = df[df['epiweek']>=minweek]
df_est.plot(ax=ax, x='epiweek', y='97.5%', color='silver', lw=3, style='--', label='Dados potencialmente\nincompletos')
except:
pass
plt.axhline(y=np.float(dfthres['limiar pré-epidêmico']), label='limiar pré-epidêmico', ls='--', lw=3,
color='yellow')
plt.axhline(y=np.float(dfthres['intensidade alta']), label='intensidade alta', ls='--', lw=3,
color='orange')
plt.axhline(y=np.float(dfthres['intensidade muito alta']), label='intensidade muito alta', ls='--', lw=3,
color='darkred')
dftyp.plot(ax=ax, x='epiweek', y='corredor alto', legend=False, alpha=0)
# Grab ylim in order to set the range for the red zone:
miny, maxy = ax.get_ylim()
del(ax.lines[-1])
ax.fill_between(dftyp['epiweek'], dftyp['corredor alto'], maxy, color='red', alpha=0.5)
ax.set_ylim([miny, maxy])
# Draw vertical line indicating user selected week:
plt.axvline(axes=ax, x=week, color='silver', lw=8, alpha=0.5)
# Use defined font properties for axis tick labels
for label in ax.get_xticklabels() :
label.set_fontproperties(fontpropticks)
for label in ax.get_yticklabels() :
label.set_fontproperties(fontpropticks)
uf = df['Unidade da Federação'].unique()[0]
ax.set_title(uf, fontproperties=fontproplbl)
ax.set_xlabel('SE', fontproperties=fontproplbl)
ax.set_ylabel('Incidência (por 100mil habitantes)', fontproperties=fontproplbl)
xticks = np.arange(4,53,4)
ax.set_xticks(xticks)
ax.set_xticklabels(xticks)
# Shrink current axis by 20%
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
ax.legend(prop=fontproplgd, loc='center left', bbox_to_anchor=(1,0.5))
if (df.loc[df['epiweek'] == week, 'Situation'].values[0] == 'estimated'):
activity_levels = df.loc[df.epiweek == week, level_dict.keys()]
ax.text(1.01, 0.99, s=str('Probabilidade do nível de atividade:\n'+
'Baixa: %s\n'+
'Epidêmica: %s\n'+
'Alta: %s\n'+
'Muito alta: %s') %
('{:.1f}%'.format(100*float(activity_levels['L0'])),
'{:.1f}%'.format(100*float(activity_levels['L1'])),
'{:.1f}%'.format(100*float(activity_levels['L2'])),
'{:.1f}%'.format(100*float(activity_levels['L3']))),
fontproperties=fontproplgd, ha='left', va='top', transform=ax.transAxes)
txt = '*Dados estimados sujeitos a alterações futuras.'
elif (df.loc[df.epiweek == week, 'Situation'].values[0] == 'stable'):
df_level = df.loc[(df['epiweek'] == week), level_dict.keys()].unstack().copy()
df_level.sort_values(ascending=False, inplace=True)
activity_level = df_level.index[0][0]
ax.text(1.01, 0.99, s=str('Nível de atividade:\n'+
'%s\n') % level_dict[activity_level],
fontproperties=fontproplgd, ha='left', va='top', transform=ax.transAxes)
txt = '*Dados sujeitos a alterações futuras.'
else:
df_level = df.loc[(df['epiweek'] == week), level_dict.keys()].unstack().copy()
df_level.sort_values(ascending=False, inplace=True)
activity_level = df_level.index[0][0]
ax.text(1.01, 0.99, s=str('Nível de atividade:\n'+
'%s\n') % level_dict[activity_level],
fontproperties=fontproplgd, ha='left', va='top', transform=ax.transAxes)
txt = '*Dados potencialmente incompletos,\nsujeitos a alterações futuras.'
ax.text(.99, 0.99, s=txt, fontproperties=fontpropticks, ha='right', va='top', transform=ax.transAxes)
return
In [10]:
season = 2013
uf = 'Rio Grande do Sul'
week = 32
df_tmp = df[(df['Unidade da Federação'] == uf) & (df['epiyear'] == season)]
dftyp_tmp = dftypical[dftypical['Unidade da Federação'] == uf]
dfthres_tmp = dfthresholds[dfthresholds['Unidade da Federação'] == uf]
plot_timeseries(df=df_tmp, dfthres=dfthres_tmp, dftyp=dftyp_tmp)
In [11]:
season = 2016
uf = 'Rio Grande do Sul'
week = 41
df_tmp = df[(df['Unidade da Federação'] == uf) & (df['epiyear'] == season)]
dftyp_tmp = dftypical[dftypical['Unidade da Federação'] == uf]
dfthres_tmp = dfthresholds[dfthresholds['Unidade da Federação'] == uf]
plot_timeseries(df=df_tmp, dftyp=dftyp_tmp, dfthres=dfthres_tmp)
In [12]:
season = 2016
uf = 'Regional Sul'
week = 41
df_tmp = df[(df['Unidade da Federação'] == uf) & (df['epiyear'] == season)]
dftyp_tmp = dftypical[dftypical['Unidade da Federação'] == uf]
dfthres_tmp = dfthresholds[dfthresholds['Unidade da Federação'] == uf]
plot_timeseries(df=df_tmp, dfthres=dfthres_tmp, dftyp=dftyp_tmp)
In [13]:
season = 2016
uf = 'Rondônia'
week = 32
df_tmp = df[(df['Unidade da Federação'] == uf) & (df['epiyear'] == season)]
dftyp_tmp = dftypical[dftypical['Unidade da Federação'] == uf]
dfthres_tmp = dfthresholds[dfthresholds['Unidade da Federação'] == uf]
plot_timeseries(df=df_tmp, dfthres=dfthres_tmp, dftyp=dftyp_tmp)
Create column with the most probable activity level per week:
In [14]:
df['Level'] = df[list(level_dict.keys())].idxmax(axis=1)
In [15]:
season = 2016
uf = 'Rio Grande do Sul'
week = 41
df_tmp_week_level = df.loc[(df['Unidade da Federação'] == uf) & (df['epiyear'] == season) & (df['epiweek'] == week),
list(level_dict.keys())+['Level']]
df_tmp_week_level
Out[15]:
In [16]:
activity_level = df_tmp_week_level['Level'].values[0]
activity_level_prob = float(df_tmp_week_level[activity_level])
print(level_dict[activity_level], '{:.1f}%'.format(100*activity_level_prob))
In [17]:
df_age_dist = pd.read_csv('../data/clean_data_epiweek-weekly-incidence_w_situation.csv', low_memory=False, encoding='utf-8')
df_age_dist.rename(columns={'sexo': 'Sexo'}, inplace=True)
In [18]:
df_age_dist.columns
Out[18]:
In [19]:
df_age_dist.head()
Out[19]:
In [20]:
def plot_agedist(df):
'''
Plot incidence profile by age
'''
# Set font properties
fontproplgd = fm.FontProperties('Oswald')
fontproplgd.set_size(28)
fontproplbl = fm.FontProperties('Oswald')
fontproplbl.set_size(42)
fontpropticks = fm.FontProperties('Oswald')
fontpropticks.set_size(24)
# Set figure size
fig, ax = plt.subplots(1, 1, figsize = [20, 20])
# Check data situation
data_situation = df['Situation'].values[0]
if data_situation == 'stable':
txt = '*Dados sujeitos a pequenas alterações futuras'
else:
txt = '*Dados potencialmente incompletos,\nsujeitos a alterações futuras'
age_cols = ['0-4 anos', '5-9 anos', '10-19 anos', '20-29 anos', '30-39 anos',
'40-49 anos', '50-59 anos', '60+ anos']
df_agedist = df[age_cols+['Sexo']].set_index('Sexo').transpose()
df_agedist.rename(columns={'F': 'Mulheres', 'M':'Homens'}, inplace=True)
ax = df_agedist[['Homens', 'Mulheres', 'Total']].plot(kind='bar', ax=ax)
plt.setp(ax.get_xticklabels(), rotation=45, ha='right', fontproperties=fontpropticks)
plt.setp(ax.get_yticklabels(), fontproperties=fontpropticks)
ax.legend(prop=fontproplgd, loc='upper right', bbox_to_anchor=(1,.9))
#for label in ax.get_xticklabels() :
# label.set_fontproperties(fontpropticks)
#for label in ax.get_yticklabels() :
# label.set_fontproperties(fontpropticks)
uf = df['Unidade da Federação'].unique()[0]
ax.set_title(uf, fontproperties=fontproplbl)
ax.set_xlabel('Faixa etária', fontproperties=fontproplbl)
ax.set_ylabel('Incidência (por 100mil habitantes)', fontproperties=fontproplbl)
ax.text(.99, 0.99, s=txt, fontproperties=fontpropticks, ha='right', va='top', transform=ax.transAxes)
return
In [21]:
season = 2016
uf = 'Rio Grande do Sul'
week = 20
df_tmp = df_age_dist[(df_age_dist['Unidade da Federação'] == uf) & (df_age_dist['epiyear'] == season) & (df_age_dist['epiweek'] == week)]
plot_agedist(df_tmp)
In [22]:
season = 2016
uf = 'Rio Grande do Sul'
week = 41
df_tmp = df_age_dist[(df_age_dist['Unidade da Federação'] == uf) & (df_age_dist['epiyear'] == season) & (df_age_dist['epiweek'] == week)]
plot_agedist(df_tmp)
In [23]:
season = 2016
week = 41
table_cols = ['Unidade da Federação', 'SRAG', '50%', '2.5%', '97.5%', 'Situation']
df_table = df.loc[(df['epiyear']==season) & (df['epiweek']==week), table_cols]
def report_inc(x, low, high, situation):
if situation == 'stable':
y = '%.2f' % x
elif situation == 'estimated':
y = '%.2f [%.2f - %.2f]' % (x, low, high)
else:
y = '*%.2f' % x
return y
df_table['Incidência (por 100 mil hab.)'] = df_table[['50%', '2.5%', '97.5%', 'Situation']].apply(
lambda row: report_inc(row['50%'], row['2.5%'], row['97.5%'], row['Situation']), axis=1)
situation_dict = {'stable':'Dado estável. Sujeito a pequenas alterações', 'estimated': 'Estimado. Sujeito a alterações',
'unknown': 'Dados incompletos. Sujeito a grandes alterações'}
df_table['Situação'] = df_table['Situation'].map(lambda x: situation_dict[x])
df_table[['Unidade da Federação', 'Situação', 'Incidência (por 100 mil hab.)']]
Out[23]:
From the data frame df -- which has detailed info with relevant columns --, we will build an aggregated data frame df_by_season to store the total counts per season, per location (state, region and country), as well as season level indicator.
In [24]:
season_cols = ['UF', 'Unidade da Federação', 'epiyear', 'SRAG',
'Tipo', 'Situation',
'Level']
df_tmp = df[season_cols].copy()
season_list = df_tmp.epiyear.unique()
for season in season_list:
situation = list(df_tmp[df_tmp.epiyear==season].Situation.unique())
if ('unknown' in situation or 'estimate' in situation):
df_tmp.loc[df_tmp.epiyear == season, 'Situation'] = 'incomplete'
else:
df_tmp.loc[df_tmp.epiyear == season, 'Situation'] = 'stable'
In [25]:
df_tmp.head()
Out[25]:
In [26]:
df_tmp.tail()
Out[26]:
In [27]:
df_by_season = df_age_dist[['UF', 'Unidade da Federação', 'epiyear', 'Sexo', 'SRAG',
'0-4 anos', '5-9 anos', '10-19 anos', '20-29 anos', '30-39 anos',
'40-49 anos', '50-59 anos', '60+ anos']].groupby(['UF', 'Unidade da Federação', 'epiyear', 'Sexo'],
as_index=False).sum()
In [28]:
def get_season_situation(x):
season_situation = df_tmp[(df_tmp.UF == x.UF) & (df_tmp.epiyear == x.epiyear)].Situation.unique()[0]
return season_situation
In [29]:
df_by_season['Situation'] = df_by_season.apply(get_season_situation, axis=1)
Data frame df_by_season has the total number of reported cases in column SRAG, the total per age bracket, and the situation of the data ("stable" or "incomplete"). The situation column should be interpreted in the same way as "stable" and "unknown" markers from the detailed panel. The same warnings should be raised when presenting the data
In [30]:
df_by_season.head()
Out[30]:
In [31]:
df_by_season.tail()
Out[31]:
First we will create a temporary data frame to store the number of times each level was reported during each state and for each season. This can be done by crossing the info in df_tmp
In [32]:
df_by_season_level = pd.crosstab([df_tmp.UF, df_tmp['Unidade da Federação'], df_tmp.epiyear],
df_tmp.Level).reset_index()
In [33]:
df_by_season_level.columns.name = None
df_by_season_level.head()
Out[33]:
Using the information stored in df_by_season_level we can apply the defined criteria to obtain season's level:
In [34]:
def get_season_level(x):
'''
Generate season level code based on counts over weekly activity
'''
if x['L2'] + x['L3'] > 4:
season_level = 'red'
elif x['L2'] + x['L3'] > 1:
season_level = 'orange'
elif x['L1'] > 1:
season_level = 'yellow'
else:
season_level = 'green'
return season_level
We see that, for row 255, we have season 2016 for Regional Sul, which recorded so far 14 weeks at L0, 13 at L1, 6 at L2 and 8 at L3. Since we have more than 4 weeks with activity L2 or L3, the seasonal level should be red, as defined in our criteria.
In [35]:
df_by_season_level.ix[255]
Out[35]:
In [36]:
get_season_level(df_by_season_level[list(level_dict.keys())].ix[255])
Out[36]:
Finally, column "Level" has the corresponding color code appropriate for season's activity level, based on pre-defined criteria:
In [37]:
df_by_season_level['Level'] = df_by_season_level[list(level_dict.keys())].apply(get_season_level, axis=1)
In [38]:
df_by_season_level.head()
Out[38]:
In [39]:
df_by_season_level.tail()
Out[39]:
In [40]:
df_by_season_level[df_by_season_level.UF == '43']
Out[40]:
In [41]:
df_by_season = df_by_season.merge(df_by_season_level[['UF', 'epiyear', 'Level']], on=['UF', 'epiyear'], how='left')
In [42]:
df_by_season.head()
Out[42]:
In [43]:
df_by_season.tail()
Out[43]:
In [47]:
season = 2016
uf = 'Pará'
df_tmp = df_by_season[(df_by_season['Unidade da Federação'] == uf) & (df_by_season['epiyear'] == season)]
plot_agedist(df_tmp)
In [52]:
season = 2016
table_cols = ['Unidade da Federação', 'SRAG', 'Situation']
df_table = df_by_season.loc[(df_by_season['epiyear']==season) & (df_by_season['Sexo'] == 'Total'), table_cols]
def report_inc(x, sitaution, low=None, high=None):
if situation == 'stable':
y = '%.2f' % x
elif situation == 'estimated':
y = '%.2f [%.2f - %.2f]' % (x, low, high)
else:
y = '*%.2f' % x
return y
df_table['Incidência (por 100 mil hab.)'] = df_table[['SRAG', 'Situation']].apply(
lambda row: report_inc(row['SRAG'], row['Situation']), axis=1)
situation_dict = {'stable':'Dado estável. Sujeito a pequenas alterações',
'incomplete': 'Dados incompletos. Sujeito a grandes alterações'}
df_table['Situação'] = df_table['Situation'].map(lambda x: situation_dict[x])
df_table[['Unidade da Federação', 'Situação', 'Incidência (por 100 mil hab.)']]
Out[52]:
In [ ]: