In [1]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import getpass
import pdvega
# for configuring connection
from configobj import ConfigObj
import os
%matplotlib inline
In [2]:
# Create a database connection using settings from config file
config='../db/config.ini'
# connection info
conn_info = dict()
if os.path.isfile(config):
config = ConfigObj(config)
conn_info["sqluser"] = config['username']
conn_info["sqlpass"] = config['password']
conn_info["sqlhost"] = config['host']
conn_info["sqlport"] = config['port']
conn_info["dbname"] = config['dbname']
conn_info["schema_name"] = config['schema_name']
else:
conn_info["sqluser"] = 'postgres'
conn_info["sqlpass"] = ''
conn_info["sqlhost"] = 'localhost'
conn_info["sqlport"] = 5432
conn_info["dbname"] = 'eicu'
conn_info["schema_name"] = 'public,eicu_crd'
# Connect to the eICU database
print('Database: {}'.format(conn_info['dbname']))
print('Username: {}'.format(conn_info["sqluser"]))
if conn_info["sqlpass"] == '':
# try connecting without password, i.e. peer or OS authentication
try:
if (conn_info["sqlhost"] == 'localhost') & (conn_info["sqlport"]=='5432'):
con = psycopg2.connect(dbname=conn_info["dbname"],
user=conn_info["sqluser"])
else:
con = psycopg2.connect(dbname=conn_info["dbname"],
host=conn_info["sqlhost"],
port=conn_info["sqlport"],
user=conn_info["sqluser"])
except:
conn_info["sqlpass"] = getpass.getpass('Password: ')
con = psycopg2.connect(dbname=conn_info["dbname"],
host=conn_info["sqlhost"],
port=conn_info["sqlport"],
user=conn_info["sqluser"],
password=conn_info["sqlpass"])
query_schema = 'set search_path to ' + conn_info['schema_name'] + ';'
In [6]:
uniquepid = '002-33870'
query = query_schema + """
select *
from patient
where uniquepid = '{}'
""".format(uniquepid)
df = pd.read_sql_query(query, con)
df.head()
Out[6]:
Here we see two unit stays for a single patient. Note also that both unit stays have the same patienthealthsystemstayid
- this indicates that they occurred within the same hospitalization.
We can see the unitstaytype
was 'admit' for one stay, and 'stepdown/other' for another. Other columns can give us more information.
In [7]:
df[['patientunitstayid', 'wardid', 'unittype', 'unitstaytype', 'hospitaladmitoffset', 'unitdischargeoffset']]
Out[7]:
Note that it's not explicitly obvious which stay occurred first. Earlier stays will be closer to hospital admission, and therefore have a higher hospitaladmitoffset. Above, the stay with a hospitaladmitoffset
of -14 was first (occurring 14 minutes after hospital admission), followed by the next stay with a hospitaladmitoffset
of 22 (which occurred 22 minutes after hospital admission). Practically, we wouldn't consider the first admission a "real" ICU stay, and it's likely an idiosyncrasy of the administration system at this particular hospital. Notice how both rows have the same wardid
.
In [8]:
query = query_schema + """
select age, count(*) as n
from patient
group by age
order by n desc
"""
df = pd.read_sql_query(query, con)
df.head()
Out[8]:
As is common in eICU-CRD, there are a subset of hospitals who routinely utilize this portion of the medical record (and thus have 90-100% data completion), while there are other hospitals who rarely use this interface and thus have poor data completion (0-10%).