In [1]:
from matplotlib import pyplot as plt
from eralchemy import render_er
from IPython.display import display
from PIL import Image
from datetime import datetime, date, timedelta
import os
import matplotlib as mpl
import numpy as np
import pandas as pd
import psycopg2
import pytz
import sys
import sqlalchemy as sqla
In [2]:
dir_root = os.path.dirname(os.getcwd())
dir_app = os.path.join(
dir_root, 'AlertaDengue'
)
sys.path.insert(0, dir_app)
from AlertaDengue import settings
from dados import dbdata
In [3]:
# connection
db_dsn = 'postgresql://{}:{}@{}/{}'.format(
settings.PSQL_USER,
settings.PSQL_PASSWORD,
settings.PSQL_HOST,
settings.PSQL_DB)
engine = sqla.create_engine(db_dsn)
# connection info_dengue
db_dsn2 = 'postgresql://{}:{}@{}/{}'.format(
settings.PSQL_USER,
settings.PSQL_PASSWORD,
settings.PSQL_HOST,
'infodengue')
engine_info_dengue = sqla.create_engine(db_dsn2)
In [4]:
# scheme
pd.read_sql('SELECT nspname FROM pg_catalog.pg_namespace;', engine)
# Dengue_global
# Municipio
Out[4]:
In [5]:
# Schema: Dengue_global
sql = '''
SELECT table_name
FROM information_schema.tables
WHERE table_schema='Dengue_global'
ORDER BY table_schema,table_name;
'''
display(pd.read_sql(sql, engine))
render_er(db_dsn, '/tmp/img.png', schema='Dengue_global')
Image.open('/tmp/img.png')
Out[5]:
In [6]:
sql = 'SELECT * from "Dengue_global"."Municipio"'
pd.read_sql(sql, engine).head()
Out[6]:
In [7]:
# Schema: Municipio
sql = '''
SELECT table_name
FROM information_schema.tables
WHERE table_schema='Municipio'
ORDER BY table_schema,table_name;
'''
display(pd.read_sql(sql, engine))
render_er(db_dsn, '/tmp/img.png', schema='Municipio')
Image.open('/tmp/img.png')
Out[7]:
In [8]:
# Schema: public
sql = '''
SELECT table_name
FROM information_schema.tables
WHERE table_schema='public'
ORDER BY table_schema,table_name;
'''
display(pd.read_sql(sql, engine_info_dengue))
render_er(db_dsn2, '/tmp/img.png', schema='public')
Image.open('/tmp/img.png')
Out[8]:
In [9]:
sql = 'SELECT * from "Municipio"."Historico_alerta"'
pd.read_sql(sql, engine).head()
Out[9]:
In [10]:
ufs = ['Rio de Janeiro', 'Paraná', 'Espírito Santo']
ufs_s = ["'%s'" % v for v in ufs]
In [11]:
geo_id_curitiba = 4106902 # Curitiba
geo_id_rio_de_janeiro = 3304557 # Rio de Janeiro
In [12]:
# Municípios participantes
sql = '''
SELECT uf, COALESCE(COUNT(municipio_geocodigo), 0) AS count
FROM (
SELECT DISTINCT municipio_geocodigo
FROM "Municipio"."Historico_alerta") AS alerta
INNER JOIN "Dengue_global"."Municipio" AS municipio
ON alerta.municipio_geocodigo = municipio.geocodigo
WHERE uf IN (%s)
GROUP BY uf
''' % ','.join(ufs_s)
display(pd.read_sql(sql, engine))
In [13]:
# Total de casos notificado e estimados na semana
for w in ['36', '37']:
se = str(datetime.now().isocalendar()[0])
# se += str(datetime.now().isocalendar()[1]).rjust(2, '0')
se += w
sql = '''
SELECT
uf,
COALESCE(SUM(casos), 0) AS casos,
COALESCE(SUM(casos_est), 0) AS casos_est
FROM "Municipio"."Historico_alerta" AS alerta
INNER JOIN "Dengue_global"."Municipio" AS municipio
ON alerta.municipio_geocodigo = municipio.geocodigo
WHERE uf IN (%s) AND "SE" = %s
GROUP BY uf
''' % (','.join(ufs_s), se)
display(pd.read_sql(sql, engine))
In [14]:
# Previsão de casos para as próximas semanas
# TODO: atualmente está igual à semana atual
for w in ['36', '37']:
for uf in ufs:
se = str(datetime.now().isocalendar()[0])
# se += str(datetime.now().isocalendar()[1]).rjust(2, '0')
se += w
sql = '''
SELECT SUM(casos) AS casos, SUM(casos_est) AS casos_est
FROM "Municipio"."Historico_alerta" AS alerta
INNER JOIN "Dengue_global"."Municipio" AS municipio
ON alerta.municipio_geocodigo = municipio.geocodigo
WHERE uf='%s' AND "SE" = %s
''' % (uf, se)
print('\n', uf, se)
display(pd.read_sql(sql, engine))
In [15]:
# Variação em relação à semana anterior
se1 = str(datetime.now().isocalendar()[0])
# se += str(datetime.now().isocalendar()[1]).rjust(2, '0')
se1 += '36'
se2 = str(datetime.now().isocalendar()[0])
# se += str(datetime.now().isocalendar()[1]).rjust(2, '0')
se2 += '37'
sql = '''
SELECT
uf,
(SUM(alerta.casos)-SUM(alerta_passado.casos)) AS casos,
(SUM(alerta.casos_est)-SUM(alerta_passado.casos_est)) AS casos_est
FROM "Municipio"."Historico_alerta" AS alerta
INNER JOIN "Municipio"."Historico_alerta" AS alerta_passado
ON (
alerta.municipio_geocodigo = alerta_passado.municipio_geocodigo
AND alerta."SE"=%s
AND alerta_passado."SE"=%s)
INNER JOIN "Dengue_global"."Municipio" AS municipio
ON alerta.municipio_geocodigo = municipio.geocodigo
WHERE uf IN (%s)
GROUP BY uf
''' % (se2, se1, ','.join(ufs_s))
display(pd.read_sql(sql, engine))
In [16]:
# Distribuição por tipo de doença
uf = 'Paraná'
sql = '''
SELECT *
FROM "Municipio"."Notificacao" AS alerta
INNER JOIN "Dengue_global"."Municipio" AS municipio
ON alerta.municipio_geocodigo = municipio.geocodigo
INNER JOIN "Dengue_global"."CID10" AS cid10
ON alerta.cid10_codigo = cid10.codigo
WHERE uf='{}' AND se_notif=36 AND ano_notif=2016
LIMIT 2
'''.format(uf)
display(pd.read_sql(sql, engine))
In [17]:
# Distribuição por gênero
uf = 'Paraná'
sql = '''
SELECT *
FROM "Municipio"."Historico_alerta" AS alerta
INNER JOIN "Dengue_global"."Municipio" AS municipio
ON alerta.municipio_geocodigo = municipio.geocodigo
LIMIT 2
'''
display(pd.read_sql(sql, engine))
In [18]:
# Select the last alert level for each city in a specific state
uf = 'Paraná'
sql = '''
SELECT
hist_alert.id,
hist_alert.municipio_geocodigo,
municipio.nome,
hist_alert."data_iniSE",
(hist_alert.nivel-1) AS level_alert
FROM
"Municipio"."Historico_alerta" AS hist_alert
INNER JOIN (
SELECT geocodigo, MAX("data_iniSE") AS "data_iniSE"
FROM
"Municipio"."Historico_alerta" AS alerta
INNER JOIN "Dengue_global"."Municipio" AS municipio
ON alerta.municipio_geocodigo = municipio.geocodigo
WHERE uf='{}'
GROUP BY geocodigo
) AS recent_alert ON (
recent_alert.geocodigo=hist_alert.municipio_geocodigo
AND recent_alert."data_iniSE"=hist_alert."data_iniSE"
) INNER JOIN "Dengue_global"."Municipio" AS municipio ON (
hist_alert.municipio_geocodigo = municipio.geocodigo
)
'''.format(uf)
df = pd.read_sql(sql, engine, 'id')
mun_dict = dict(df[['municipio_geocodigo', 'nome']].values)
str(mun_dict)[:80]
alerts = dict(df[['municipio_geocodigo', 'level_alert']].values)
str(alerts)[:80]
Out[18]:
In [19]:
len(alerts.keys())
Out[19]:
In [20]:
# Select the 12 last alert level for each city in a specific state
geo_ids = list(alerts.keys())
sql_template = '''(
SELECT municipio_geocodigo, "data_iniSE", casos_est, id
FROM "Municipio"."Historico_alerta"
WHERE municipio_geocodigo={}
ORDER BY "data_iniSE" DESC
LIMIT 12)'''
sql = ' UNION '.join([
sql_template.format(gid) for gid in geo_ids
]) + ' ORDER BY municipio_geocodigo, "data_iniSE"'
df_case_series = pd.read_sql(sql, engine, 'id')
case_series_tail_12 = {
k: v.casos_est.values
for k, v in df_case_series.groupby(by='municipio_geocodigo')
}
case_series_tail_12[list(case_series_tail_12.keys())[0]]
Out[20]:
In [35]:
# geo_id = 4106902 # Curitiba
uf = 'Paraná'
sql = '''
SELECT
dt_notific,
count(dt_notific) AS Casos
FROM "Municipio"."Notificacao" AS notif
INNER JOIN "Dengue_global"."Municipio" AS municipio
ON notif.municipio_geocodigo = municipio.geocodigo
WHERE uf='{}'
GROUP BY "dt_notific"
ORDER BY "dt_notific"
'''.format(uf)
df_alert_period = pd.read_sql(sql, engine, index_col='dt_notific')
df_alert_period.head()
display(df_alert_period.head())
df_alert_period.plot()
plt.show()
In [23]:
uf = 'Paraná'
sql = '''
SELECT
"data_iniSE",
SUM(casos) AS Casos
FROM "Municipio"."Historico_alerta" AS alerta
INNER JOIN "Dengue_global"."Municipio" AS municipio
ON alerta.municipio_geocodigo = municipio.geocodigo
WHERE uf='{}'
GROUP BY "data_iniSE"
ORDER BY "data_iniSE"
'''.format(uf)
df_alert_period = pd.read_sql(sql, engine, index_col='data_iniSE')
df_alert_period.head()
Out[23]:
In [24]:
uf = 'Paraná'
sql = '''
SELECT
(CASE COALESCE(cs_sexo, 'Não informado')
WHEN 'M' THEN 'Homem'
WHEN 'F' THEN 'Mulher'
ELSE 'Não informado'
END
) AS gender,
COUNT(id) AS casos
FROM
"Municipio"."Notificacao" AS notif
INNER JOIN "Dengue_global"."Municipio" AS municipio
ON notif.municipio_geocodigo = municipio.geocodigo
WHERE uf='{}'
GROUP BY cs_sexo;
'''.format(uf)
df_sex_dist = pd.read_sql(sql, engine, 'gender')
display(df_sex_dist)
df_sex_dist.plot(kind='bar')
plt.xticks(rotation=45)
plt.show()
In [25]:
sql = '''
SELECT * FROM "Dengue_global"."CID10"
WHERE codigo IN ({})
'''.format(','.join(["'{}'".format(k) for k in dbdata.CID10.values()]))
df_cid10 = pd.read_sql(sql, engine, 'codigo')
df_cid10
Out[25]:
In [40]:
uf = 'Paraná'
sql = '''
SELECT
COALESCE(cid10.nome, 'Não informado') AS nome,
count(id) AS casos
FROM
"Municipio"."Notificacao" AS notif
INNER JOIN "Dengue_global"."Municipio" AS municipio
ON notif.municipio_geocodigo = municipio.geocodigo
LEFT JOIN "Dengue_global"."CID10" AS cid10
ON notif.cid10_codigo=cid10.codigo
WHERE uf='{}' AND (cid10.codigo IN ({}) OR notif.cid10_codigo IS NULL)
GROUP BY cid10.nome;
'''.format(uf, ','.join(["'{}'".format(k) for k in dbdata.CID10.values()]))
df_disease_dist = pd.read_sql(sql, engine)
In [41]:
dfs = [df_disease_dist]
for k in df_cid10.nome.values:
if not k in df_disease_dist.nome.values:
dfs.append(pd.DataFrame({'nome': [k], 'casos': [0]}))
In [42]:
df_disease_dist = pd.concat(dfs)
In [43]:
display(df_disease_dist)
df_disease_dist.set_index('nome', drop=True).plot(kind='bar')
plt.xticks(rotation=45)
plt.show()
In [30]:
uf = 'Paraná'
sql = '''
SELECT age, count(age) AS cases
FROM (
SELECT
CASE
WHEN nu_idade_n BETWEEN 4000 AND 4004 THEN '0-4'
WHEN nu_idade_n BETWEEN 4005 AND 4009 THEN '5-9'
WHEN nu_idade_n BETWEEN 4010 AND 4019 THEN '10-19'
WHEN nu_idade_n BETWEEN 4020 AND 4029 THEN '20-29'
WHEN nu_idade_n BETWEEN 4030 AND 4039 THEN '30-39'
WHEN nu_idade_n BETWEEN 4040 AND 4049 THEN '40-49'
WHEN nu_idade_n BETWEEN 4050 AND 4059 THEN '50-59'
WHEN nu_idade_n >=4060 THEN '60+'
ELSE 'Não informado'
END AS age
FROM
"Municipio"."Notificacao" AS notif
INNER JOIN "Dengue_global"."Municipio" AS municipio
ON notif.municipio_geocodigo = municipio.geocodigo
WHERE uf='{}'
) AS TB
GROUP BY age
ORDER BY age
'''.format(uf)
df_age_dist = pd.read_sql(sql, engine, 'age')
display(df_age_dist)
df_age_dist.plot(kind='bar')
plt.xticks(rotation=45)
plt.show()