In [1]:
from sqlalchemy import create_engine, text
from AlertaDengue.settings import (
PSQL_DB, PSQL_HOST, PSQL_PASSWORD, PSQL_USER
)
import pandas as pd
import os
In [2]:
tb_no_data = [
'Notificacao', 'historico_casos', 'Tweet'
]
In [3]:
schemas = ['Dengue_global', 'forecast', 'Municipio']
sql_show_tables = '''
SELECT table_schema,table_name
FROM information_schema.tables
WHERE table_schema IN (%s)
ORDER BY table_schema,table_name;
''' % ','.join(["'%s'" % s for s in schemas])
In [4]:
engine = create_engine("postgresql://{}:{}@{}/{}".format(
PSQL_USER,
PSQL_PASSWORD,
PSQL_HOST,
PSQL_DB
))
In [5]:
with engine.connect() as conn:
df = pd.read_sql(sql_show_tables, con=conn)
print(df)
In [6]:
os.environ['PSQL_DB'] = PSQL_DB
os.environ['PSQL_DB_DEMO'] = 'ALERTADEMO'
In [7]:
!pg_dump -s -O -x $PSQL_DB > /tmp/alertademo.sql
In [8]:
!dropdb $PSQL_DB_DEMO
In [9]:
!createdb $PSQL_DB_DEMO
In [10]:
!psql -d $PSQL_DB_DEMO < /tmp/alertademo.sql
In [11]:
engine_demo = create_engine("postgresql://{}:{}@{}/{}".format(
PSQL_USER,
PSQL_PASSWORD,
PSQL_HOST,
'ALERTADEMO'
))
In [12]:
dt_types = [
'timestamp with time zone', 'date', 'timestamp without time zone'
]
log = ''
In [13]:
with engine_demo.connect() as conn_demo:
for schema in schemas:
conn_demo.execute('CREATE SCHEMA IF NOT EXISTS "%s";' % schema)
df_schema = df[df.table_schema == schema]
with engine.connect() as conn:
for i, row in df_schema.iterrows():
if row.table_name in tb_no_data:
continue
sql = '''
SELECT *
FROM information_schema.columns
WHERE table_schema = '%s'
AND table_name = '%s'
''' % (row.table_schema, row.table_name)
# treat tables with date/time fields
df_tb = pd.read_sql(sql, con=conn)
df_dt = df_tb[df_tb.data_type.isin(dt_types)]
sql_where = ''
if not df_dt.empty:
sql_where = ' WHERE '
sql_where += ' AND '.join([
"\"%s\" > (NOW() - INTERVAL '2 year')" % field
for field in df_dt.column_name
])
sql = 'SELECT * FROM "%s"."%s" ' % (row.table_schema, row.table_name)
sql += sql_where
sql += ' LIMIT 10000;'
df_tb = pd.read_sql(sql, con=conn)
print(row.table_schema, row.table_name, df_tb.shape, end=' ... ')
try:
df_tb.to_sql(
row.table_name, con=conn_demo,
schema=row.table_schema, if_exists='append', index=False
)
print('OK')
except Exception as e:
log += str(e)
print('FAILED')
In [15]:
!pg_dump -O -x $PSQL_DB_DEMO > /tmp/alertademo_data.sql