Provides information related a patient’s relevant past medical history. Providing detailed past history is not common, but items such as AIDS, Cirrhosis of the Liver, Hepatic Failure, Chronic Renal Failure, Transplant, and Pre-existing Cancers / immunosuppression are more reliable because of their importance in severity outcome scoring.
In [1]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import getpass
import pdvega
# for configuring connection
from configobj import ConfigObj
import os
%matplotlib inline
In [2]:
# Create a database connection using settings from config file
config='../db/config.ini'
# connection info
conn_info = dict()
if os.path.isfile(config):
config = ConfigObj(config)
conn_info["sqluser"] = config['username']
conn_info["sqlpass"] = config['password']
conn_info["sqlhost"] = config['host']
conn_info["sqlport"] = config['port']
conn_info["dbname"] = config['dbname']
conn_info["schema_name"] = config['schema_name']
else:
conn_info["sqluser"] = 'postgres'
conn_info["sqlpass"] = ''
conn_info["sqlhost"] = 'localhost'
conn_info["sqlport"] = 5432
conn_info["dbname"] = 'eicu'
conn_info["schema_name"] = 'public,eicu_crd'
# Connect to the eICU database
print('Database: {}'.format(conn_info['dbname']))
print('Username: {}'.format(conn_info["sqluser"]))
if conn_info["sqlpass"] == '':
# try connecting without password, i.e. peer or OS authentication
try:
if (conn_info["sqlhost"] == 'localhost') & (conn_info["sqlport"]=='5432'):
con = psycopg2.connect(dbname=conn_info["dbname"],
user=conn_info["sqluser"])
else:
con = psycopg2.connect(dbname=conn_info["dbname"],
host=conn_info["sqlhost"],
port=conn_info["sqlport"],
user=conn_info["sqluser"])
except:
conn_info["sqlpass"] = getpass.getpass('Password: ')
con = psycopg2.connect(dbname=conn_info["dbname"],
host=conn_info["sqlhost"],
port=conn_info["sqlport"],
user=conn_info["sqluser"],
password=conn_info["sqlpass"])
query_schema = 'set search_path to ' + conn_info['schema_name'] + ';'
In [3]:
patientunitstayid = 141168
In [4]:
query = query_schema + """
select *
from pasthistory
where patientunitstayid = {}
order by pasthistoryoffset
""".format(patientunitstayid)
df = pd.read_sql_query(query, con)
df.head()
Out[4]:
We can make a few observations:
pasthistorypath is a slash delimited (/) hierarchical categorization of the past history recordedpasthistoryvalue and pasthistoryvaluetext are often identicalpasthistoryoffset is the time of the condition, while pasthistoryenteredoffset is when it was documented, though from above it appears the pasthistoryoffset is not necessarily the start time of the conditionLet's look for patients who were admitted with a past history of COPD.
In [5]:
dx = 'COPD'
query = query_schema + """
select
pasthistoryvalue, count(*) as n
from pasthistory
where pasthistoryvalue ilike '%{}%'
group by pasthistoryvalue
""".format(dx)
df_copd = pd.read_sql_query(query, con)
df_copd
Out[5]:
In [6]:
dx = 'COPD'
query = query_schema + """
select
patientunitstayid, count(*) as n
from pasthistory
where pasthistoryvalue ilike '%{}%'
group by patientunitstayid
""".format(dx)
df_copd = pd.read_sql_query(query, con)
print('{} unit stays with {}.'.format(df_copd.shape[0], dx))
In [7]:
query = query_schema + """
with t as
(
select distinct patientunitstayid
from pasthistory
)
select
pt.hospitalid
, count(distinct pt.patientunitstayid) as number_of_patients
, count(distinct t.patientunitstayid) as number_of_patients_with_tbl
from patient pt
left join t
on pt.patientunitstayid = t.patientunitstayid
group by pt.hospitalid
""".format(patientunitstayid)
df = pd.read_sql_query(query, con)
df['data completion'] = df['number_of_patients_with_tbl'] / df['number_of_patients'] * 100.0
df.sort_values('number_of_patients_with_tbl', ascending=False, inplace=True)
df.head(n=10)
Out[7]:
In [8]:
df[['data completion']].vgplot.hist(bins=10,
var_name='Number of hospitals',
value_name='Percent of patients with data')
The majority of hospitals have data for the pasthistory table, again likely due to its importance for certain severity of illness scoring systems.