Dengue Alerta


In [1]:
from matplotlib import pyplot as plt
from eralchemy import render_er
from IPython.display import display 
from PIL import Image
from datetime import datetime, date, timedelta

import os
import matplotlib as mpl
import numpy as np
import pandas as pd
import psycopg2
import pytz
import sys
import sqlalchemy as sqla

In [2]:
dir_root = os.path.dirname(os.getcwd())
dir_app = os.path.join(
    dir_root, 'AlertaDengue'
)

sys.path.insert(0, dir_app)
from AlertaDengue import settings
from dados import dbdata

Database


In [3]:
# connection
db_dsn = 'postgresql://{}:{}@{}/{}'.format(
    settings.PSQL_USER,
    settings.PSQL_PASSWORD,
    settings.PSQL_HOST,
    settings.PSQL_DB)
engine = sqla.create_engine(db_dsn)

# connection info_dengue
db_dsn2 = 'postgresql://{}:{}@{}/{}'.format(
    settings.PSQL_USER,
    settings.PSQL_PASSWORD,
    settings.PSQL_HOST,
    'infodengue')
engine_info_dengue = sqla.create_engine(db_dsn2)

In [4]:
# scheme
pd.read_sql('SELECT nspname FROM pg_catalog.pg_namespace;', engine)

# Dengue_global
# Municipio


Out[4]:
nspname
0 pg_toast
1 pg_temp_1
2 pg_toast_temp_1
3 pg_catalog
4 information_schema
5 Dengue_global
6 Municipio
7 public

In [5]:
# Schema: Dengue_global

sql = '''
SELECT table_name
FROM information_schema.tables
WHERE table_schema='Dengue_global'
ORDER BY table_schema,table_name;
'''

display(pd.read_sql(sql, engine))

render_er(db_dsn, '/tmp/img.png', schema='Dengue_global')
Image.open('/tmp/img.png')


table_name
0 CID10
1 estado
2 Municipio
3 regional_saude
Out[5]:

In [6]:
sql = 'SELECT * from "Dengue_global"."Municipio"'
pd.read_sql(sql, engine).head()


Out[6]:
geocodigo nome geojson populacao uf
0 2204303 Fronteiras {"id": null, "geometry": {"coordinates": [[[-4... 11411 Piauí
1 2501807 Bayeux {"id": null, "geometry": {"coordinates": [[[-3... 95677 Paraíba
2 2905008 Caculé {"id": null, "geometry": {"coordinates": [[[-4... 23392 Bahia
3 3105608 Barbacena {"id": null, "geometry": {"coordinates": [[[-4... 133972 Minas Gerais
4 1100015 Alta Floresta D'Oeste {"id": null, "geometry": {"coordinates": [[[-6... 25652 Rondônia

In [7]:
# Schema: Municipio

sql = '''
SELECT table_name
FROM information_schema.tables
WHERE table_schema='Municipio'
ORDER BY table_schema,table_name;
'''

display(pd.read_sql(sql, engine))

render_er(db_dsn, '/tmp/img.png', schema='Municipio')
Image.open('/tmp/img.png')


table_name
0 alerta_mrj
1 Bairro
2 Clima_cemaden
3 Clima_Satelite
4 Clima_wu
5 Estacao_cemaden
6 Estacao_wu
7 Historico_alerta
8 Localidade
9 Notificacao
10 Ovitrampa
11 Tweet
Out[7]:

In [8]:
# Schema: public

sql = '''
SELECT table_name
FROM information_schema.tables
WHERE table_schema='public'
ORDER BY table_schema,table_name;
'''

display(pd.read_sql(sql, engine_info_dengue))

render_er(db_dsn2, '/tmp/img.png', schema='public')
Image.open('/tmp/img.png')


table_name
0 auth_group
1 auth_group_permissions
2 auth_permission
3 auth_user
4 auth_user_groups
5 auth_user_user_permissions
6 dbf_dbf
7 dbf_dbfchunkedupload
8 Dengue_2010
9 Dengue_2011
10 Dengue_2012
11 Dengue_2013
12 DengueConfirmados_2013
13 django_admin_log
14 django_content_type
15 django_migrations
16 django_session
17 geography_columns
18 geometry_columns
19 raster_columns
20 raster_overviews
21 spatial_ref_sys
/home/xmn/anaconda3/envs/AlertaDengue/lib/python3.5/site-packages/sqlalchemy/dialects/postgresql/base.py:2439: SAWarning: Did not recognize type 'geometry' of column 'geom'
  (attype, name))
Out[8]:

In [9]:
sql = 'SELECT * from "Municipio"."Historico_alerta"'
pd.read_sql(sql, engine).head()


Out[9]:
data_iniSE SE casos_est casos_est_min casos_est_max casos municipio_geocodigo p_rt1 p_inc100k Localidade_id nivel id versao_modelo municipio_nome
0 2016-11-06 201645 0.0 0 0 0 3302908 0.000000 0.000 0 1 3302908020164517162 2016-12-27 None
1 2011-05-08 201119 392.0 392 392 392 3300100 0.997485 211.961 0 4 3300100020111917162 2016-12-27 Angra dos Reis
2 2016-09-11 201637 0.0 0 0 0 4103057 0.000000 0.000 0 1 4103057020163717182 2017-01-16 None
3 2016-09-11 201637 0.0 0 0 0 4105904 0.000000 0.000 0 1 4105904020163717182 2017-01-16 None
4 2016-09-18 201638 0.0 0 0 0 4128633 0.500000 0.000 0 1 4128633020163817182 2017-01-16 None

Essay #1

  • Municípios participantes
  • Total de casos notificado e estimados na semana
  • Previsão de casos para as próximas semanas
  • Variação em relação à semana anterior

In [10]:
ufs = ['Rio de Janeiro', 'Paraná', 'Espírito Santo']
ufs_s = ["'%s'" % v for v in ufs]

In [11]:
geo_id_curitiba = 4106902  # Curitiba
geo_id_rio_de_janeiro = 3304557  # Rio de Janeiro

In [12]:
# Municípios participantes

sql = '''
SELECT uf, COALESCE(COUNT(municipio_geocodigo), 0) AS count
FROM (
    SELECT DISTINCT municipio_geocodigo 
    FROM "Municipio"."Historico_alerta") AS alerta
INNER JOIN "Dengue_global"."Municipio" AS municipio
  ON alerta.municipio_geocodigo = municipio.geocodigo
WHERE uf IN (%s)
GROUP BY uf
''' % ','.join(ufs_s)

display(pd.read_sql(sql, engine))


uf count
0 Espírito Santo 78
1 Paraná 399
2 Rio de Janeiro 92

In [13]:
# Total de casos notificado e estimados na semana
for w in ['36', '37']:

    se = str(datetime.now().isocalendar()[0])
    # se += str(datetime.now().isocalendar()[1]).rjust(2, '0')
    se += w

    sql = '''
    SELECT 
        uf, 
        COALESCE(SUM(casos), 0) AS casos, 
        COALESCE(SUM(casos_est), 0) AS casos_est
    FROM "Municipio"."Historico_alerta" AS alerta
    INNER JOIN "Dengue_global"."Municipio" AS municipio
      ON alerta.municipio_geocodigo = municipio.geocodigo
    WHERE uf IN (%s) AND "SE" = %s
    GROUP BY uf
    ''' % (','.join(ufs_s), se)

    display(pd.read_sql(sql, engine))


uf casos casos_est
uf casos casos_est

In [14]:
# Previsão de casos para as próximas semanas
# TODO: atualmente está igual à semana atual
for w in ['36', '37']:
    for uf in ufs:
        se = str(datetime.now().isocalendar()[0])
        # se += str(datetime.now().isocalendar()[1]).rjust(2, '0')
        se += w

        sql = '''
        SELECT SUM(casos) AS casos, SUM(casos_est) AS casos_est
        FROM "Municipio"."Historico_alerta" AS alerta
        INNER JOIN "Dengue_global"."Municipio" AS municipio
          ON alerta.municipio_geocodigo = municipio.geocodigo
        WHERE uf='%s' AND "SE" = %s
        ''' % (uf, se)
        
        print('\n', uf, se)
        display(pd.read_sql(sql, engine))


 Rio de Janeiro 201736
casos casos_est
0 None None
 Paraná 201736
casos casos_est
0 None None
 Espírito Santo 201736
casos casos_est
0 None None
 Rio de Janeiro 201737
casos casos_est
0 None None
 Paraná 201737
casos casos_est
0 None None
 Espírito Santo 201737
casos casos_est
0 None None

In [15]:
# Variação em relação à semana anterior

se1 = str(datetime.now().isocalendar()[0])
# se += str(datetime.now().isocalendar()[1]).rjust(2, '0')
se1 += '36'

se2 = str(datetime.now().isocalendar()[0])
# se += str(datetime.now().isocalendar()[1]).rjust(2, '0')
se2 += '37'

sql = '''
SELECT 
    uf,
    (SUM(alerta.casos)-SUM(alerta_passado.casos)) AS casos, 
    (SUM(alerta.casos_est)-SUM(alerta_passado.casos_est)) AS casos_est
FROM "Municipio"."Historico_alerta" AS alerta
INNER JOIN "Municipio"."Historico_alerta" AS alerta_passado
  ON (
    alerta.municipio_geocodigo = alerta_passado.municipio_geocodigo
    AND alerta."SE"=%s
    AND alerta_passado."SE"=%s)
INNER JOIN "Dengue_global"."Municipio" AS municipio
  ON alerta.municipio_geocodigo = municipio.geocodigo
WHERE uf IN (%s)
GROUP BY uf
''' % (se2, se1, ','.join(ufs_s))

display(pd.read_sql(sql, engine))


uf casos casos_est

Dashboard Dengue


In [16]:
# Distribuição por tipo de doença
uf = 'Paraná'

sql = '''
SELECT *
FROM "Municipio"."Notificacao" AS alerta
INNER JOIN "Dengue_global"."Municipio" AS municipio
  ON alerta.municipio_geocodigo = municipio.geocodigo
INNER JOIN "Dengue_global"."CID10" AS cid10
  ON alerta.cid10_codigo = cid10.codigo
WHERE uf='{}' AND se_notif=36 AND ano_notif=2016
LIMIT 2
'''.format(uf)

display(pd.read_sql(sql, engine))


id dt_notific se_notif ano_notif dt_sin_pri se_sin_pri dt_digita bairro_nome bairro_bairro_id municipio_geocodigo ... dt_nasc cs_sexo nu_idade_n geocodigo nome geojson populacao uf nome codigo
0 8716731 2016-09-10 36 2016 2016-09-05 36 2016-09-15 AQUILES STENGHEL UBS 2577852 4113700 ... None None None 4113700 Londrina {"id": null, "geometry": {"coordinates": [[[-5... 543003 Paraná Dengue [dengue clássico] A90
1 8716805 2016-09-09 36 2016 2016-09-08 36 2016-09-09 SANTA MONICA 5 4126504 ... None None None 4126504 Sertanópolis {"id": null, "geometry": {"coordinates": [[[-5... 16315 Paraná Dengue [dengue clássico] A90

2 rows × 22 columns


In [17]:
# Distribuição por gênero
uf = 'Paraná'

sql = '''
SELECT *
FROM "Municipio"."Historico_alerta" AS alerta
INNER JOIN "Dengue_global"."Municipio" AS municipio
  ON alerta.municipio_geocodigo = municipio.geocodigo
LIMIT 2
'''

display(pd.read_sql(sql, engine))


data_iniSE SE casos_est casos_est_min casos_est_max casos municipio_geocodigo p_rt1 p_inc100k Localidade_id nivel id versao_modelo municipio_nome geocodigo nome geojson populacao uf
0 2016-11-06 201645 0.0 0 0 0 3302908 0.000000 0.000 0 1 3302908020164517162 2016-12-27 None 3302908 Miguel Pereira {"id": null, "geometry": {"coordinates": [[[-4... 24829 Rio de Janeiro
1 2011-05-08 201119 392.0 392 392 392 3300100 0.997485 211.961 0 4 3300100020111917162 2016-12-27 Angra dos Reis 3300100 Angra dos Reis {"id": null, "geometry": {"coordinates": [[[[-... 184940 Rio de Janeiro

In [18]:
# Select the last alert level for each city in a specific state

uf = 'Paraná'

sql = '''
SELECT 
    hist_alert.id,
    hist_alert.municipio_geocodigo, 
    municipio.nome,
    hist_alert."data_iniSE", 
    (hist_alert.nivel-1) AS level_alert
FROM 
    "Municipio"."Historico_alerta" AS hist_alert
    INNER JOIN (
        SELECT geocodigo, MAX("data_iniSE") AS "data_iniSE"
        FROM 
            "Municipio"."Historico_alerta" AS alerta
            INNER JOIN "Dengue_global"."Municipio" AS municipio
                ON alerta.municipio_geocodigo = municipio.geocodigo
        WHERE uf='{}'
        GROUP BY geocodigo
    ) AS recent_alert ON (
        recent_alert.geocodigo=hist_alert.municipio_geocodigo 
        AND recent_alert."data_iniSE"=hist_alert."data_iniSE"
    ) INNER JOIN "Dengue_global"."Municipio" AS municipio ON (
        hist_alert.municipio_geocodigo = municipio.geocodigo
    )
'''.format(uf)

df = pd.read_sql(sql, engine, 'id')

mun_dict = dict(df[['municipio_geocodigo', 'nome']].values)
str(mun_dict)[:80]

alerts = dict(df[['municipio_geocodigo', 'level_alert']].values)
str(alerts)[:80]


Out[18]:
'{4115457: 0, 4121604: 0, 4100103: 0, 4125704: 0, 4104204: 0, 4108304: 0, 4112405'

In [19]:
len(alerts.keys())


Out[19]:
399

In [20]:
# Select the 12 last alert level for each city in a specific state

geo_ids = list(alerts.keys())

sql_template = '''(
SELECT municipio_geocodigo, "data_iniSE", casos_est, id
FROM "Municipio"."Historico_alerta"
WHERE municipio_geocodigo={}
ORDER BY "data_iniSE" DESC
LIMIT 12)'''

sql = ' UNION '.join([
    sql_template.format(gid) for gid in geo_ids
]) + ' ORDER BY municipio_geocodigo, "data_iniSE"'

df_case_series = pd.read_sql(sql, engine, 'id')
case_series_tail_12 = {
    k: v.casos_est.values 
    for k, v in df_case_series.groupby(by='municipio_geocodigo')
}

case_series_tail_12[list(case_series_tail_12.keys())[0]]


Out[20]:
array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

Distribution cases by date


In [35]:
# geo_id = 4106902  # Curitiba
uf = 'Paraná'

sql = '''
SELECT 
    dt_notific, 
    count(dt_notific) AS Casos
FROM "Municipio"."Notificacao" AS notif
INNER JOIN "Dengue_global"."Municipio" AS municipio
    ON notif.municipio_geocodigo = municipio.geocodigo
WHERE uf='{}'
GROUP BY "dt_notific"
ORDER BY "dt_notific"
'''.format(uf)

df_alert_period = pd.read_sql(sql, engine, index_col='dt_notific')
df_alert_period.head()

display(df_alert_period.head())

df_alert_period.plot()
plt.show()


casos
dt_notific
2008-01-02 4
2008-01-03 3
2008-01-04 3
2008-01-07 6
2008-01-08 1

Distribution cases by date


In [23]:
uf = 'Paraná'

sql = '''
SELECT 
    "data_iniSE", 
    SUM(casos) AS Casos
FROM "Municipio"."Historico_alerta" AS alerta
INNER JOIN "Dengue_global"."Municipio" AS municipio
    ON alerta.municipio_geocodigo = municipio.geocodigo
WHERE uf='{}'
GROUP BY "data_iniSE"
ORDER BY "data_iniSE"
'''.format(uf)

df_alert_period = pd.read_sql(sql, engine, index_col='data_iniSE')
df_alert_period.head()


Out[23]:
casos
data_iniSE
2010-01-03 294
2010-01-10 368
2010-01-17 427
2010-01-24 627
2010-01-31 809

Distribution cases by gender


In [24]:
uf = 'Paraná'

sql = '''
SELECT 
    (CASE COALESCE(cs_sexo, 'Não informado') 
     WHEN 'M' THEN 'Homem'
     WHEN 'F' THEN 'Mulher'
     ELSE 'Não informado'
     END
    ) AS gender, 
    COUNT(id) AS casos
FROM
    "Municipio"."Notificacao" AS notif
    INNER JOIN "Dengue_global"."Municipio" AS municipio
      ON notif.municipio_geocodigo = municipio.geocodigo
WHERE uf='{}'
GROUP BY cs_sexo;
'''.format(uf)

df_sex_dist = pd.read_sql(sql, engine, 'gender')
display(df_sex_dist)

df_sex_dist.plot(kind='bar')
plt.xticks(rotation=45)
plt.show()


casos
gender
Não informado 732993
Homem 21
Mulher 23

Distribution cases by disease


In [25]:
sql = '''
SELECT * FROM "Dengue_global"."CID10"
WHERE codigo IN ({})
'''.format(','.join(["'{}'".format(k) for k in dbdata.CID10.values()]))

df_cid10 = pd.read_sql(sql, engine, 'codigo')
df_cid10


Out[25]:
nome
codigo
A90 Dengue [dengue clássico]

In [40]:
uf = 'Paraná'

sql = '''
SELECT 
    COALESCE(cid10.nome, 'Não informado') AS nome, 
    count(id) AS casos
FROM
    "Municipio"."Notificacao" AS notif
    INNER JOIN "Dengue_global"."Municipio" AS municipio
      ON notif.municipio_geocodigo = municipio.geocodigo
    LEFT JOIN "Dengue_global"."CID10" AS cid10
      ON notif.cid10_codigo=cid10.codigo
WHERE uf='{}' AND (cid10.codigo IN ({}) OR notif.cid10_codigo IS NULL)
GROUP BY cid10.nome;
'''.format(uf, ','.join(["'{}'".format(k) for k in dbdata.CID10.values()]))

df_disease_dist = pd.read_sql(sql, engine)

In [41]:
dfs = [df_disease_dist]

for k in df_cid10.nome.values:
    if not k in df_disease_dist.nome.values:
        dfs.append(pd.DataFrame({'nome': [k], 'casos': [0]}))

In [42]:
df_disease_dist = pd.concat(dfs)

In [43]:
display(df_disease_dist)

df_disease_dist.set_index('nome', drop=True).plot(kind='bar')
plt.xticks(rotation=45)
plt.show()


nome casos
0 Não informado 416262
1 Dengue [dengue clássico] 316775

Distribution cases by age


In [30]:
uf = 'Paraná'

sql = '''
SELECT age, count(age) AS cases
FROM (
    SELECT 
        CASE 
        WHEN nu_idade_n BETWEEN 4000 AND 4004 THEN '0-4'
        WHEN nu_idade_n BETWEEN 4005 AND 4009 THEN '5-9'
        WHEN nu_idade_n BETWEEN 4010 AND 4019 THEN '10-19'
        WHEN nu_idade_n BETWEEN 4020 AND 4029 THEN '20-29'
        WHEN nu_idade_n BETWEEN 4030 AND 4039 THEN '30-39'
        WHEN nu_idade_n BETWEEN 4040 AND 4049 THEN '40-49'
        WHEN nu_idade_n BETWEEN 4050 AND 4059 THEN '50-59'
        WHEN nu_idade_n >=4060 THEN '60+'
        ELSE 'Não informado'
        END AS age
    FROM
        "Municipio"."Notificacao" AS notif
        INNER JOIN "Dengue_global"."Municipio" AS municipio
          ON notif.municipio_geocodigo = municipio.geocodigo
    WHERE uf='{}'
) AS TB
GROUP BY age
ORDER BY age
'''.format(uf)

df_age_dist = pd.read_sql(sql, engine, 'age')


display(df_age_dist)
df_age_dist.plot(kind='bar')
plt.xticks(rotation=45)

plt.show()


cases
age
10-19 5
20-29 9
30-39 13
40-49 6
50-59 6
5-9 2
60+ 2
Não informado 732994