admissionDx

The following columns are available:

admitDxName
admitDxText

We recommend configuring the config.ini file to allow for connection to the database without specifying your password each time.



In [1]:

    
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import getpass
import pdvega

# for configuring connection 
from configobj import ConfigObj
import os

%matplotlib inline









    



/home/alistairewj/.local/lib/python3.5/site-packages/psycopg2/__init__.py:144: UserWarning: The psycopg2 wheel package will be renamed from release 2.8; in order to keep installing from binary please use "pip install psycopg2-binary" instead. For details see: <http://initd.org/psycopg/docs/install.html#binary-install-from-pypi>.
  """)



In [2]:

    
# Create a database connection using settings from config file
config='../db/config.ini'

# connection info
conn_info = dict()
if os.path.isfile(config):
    config = ConfigObj(config)
    conn_info["sqluser"] = config['username']
    conn_info["sqlpass"] = config['password']
    conn_info["sqlhost"] = config['host']
    conn_info["sqlport"] = config['port']
    conn_info["dbname"] = config['dbname']
    conn_info["schema_name"] = config['schema_name']
else:
    conn_info["sqluser"] = 'postgres'
    conn_info["sqlpass"] = ''
    conn_info["sqlhost"] = 'localhost'
    conn_info["sqlport"] = 5432
    conn_info["dbname"] = 'eicu'
    conn_info["schema_name"] = 'public,eicu_crd'
    
# Connect to the eICU database
print('Database: {}'.format(conn_info['dbname']))
print('Username: {}'.format(conn_info["sqluser"]))
if conn_info["sqlpass"] == '':
    # try connecting without password, i.e. peer or OS authentication
    try:
        if (conn_info["sqlhost"] == 'localhost') & (conn_info["sqlport"]=='5432'):
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                   user=conn_info["sqluser"])            
        else:
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                   host=conn_info["sqlhost"],
                                   port=conn_info["sqlport"],
                                   user=conn_info["sqluser"])
    except:
        conn_info["sqlpass"] = getpass.getpass('Password: ')

        con = psycopg2.connect(dbname=conn_info["dbname"],
                               host=conn_info["sqlhost"],
                               port=conn_info["sqlport"],
                               user=conn_info["sqluser"],
                               password=conn_info["sqlpass"])
query_schema = 'set search_path to ' + conn_info['schema_name'] + ';'









    



Database: eicu
Username: alistairewj

Examine a single patient



In [3]:

    
patientunitstayid = 2704494



In [4]:

    
query = query_schema + """
select *
from admissiondx
where patientunitstayid = {}
""".format(patientunitstayid)

df = pd.read_sql_query(query, con)
df.head()









    Out[4]:







  
    
      
      patientunitstayid
      admissiondxid
      admitdxenteredyear
      admitdxenteredtime24
      admitdxenteredtime
      admitdxenteredoffset
      admitdxpath
      admitdxname
      admitdxtext
    
  
  
    
      0
      2704494
      7981672
      2014
      04:45:42
      morning
      20
      admission diagnosis|Non-operative Organ System...
      Hematology
      Hematology
    
    
      1
      2704494
      7981671
      2014
      04:45:42
      morning
      20
      admission diagnosis|Was the patient admitted f...
      No
      No
    
    
      2
      2704494
      7981673
      2014
      04:45:42
      morning
      20
      admission diagnosis|All Diagnosis|Non-operativ...
      Thrombocytopenia
      Thrombocytopenia

We can see there are only 3 records for this patient. Let's examine the admitdxpath, admitdxname, and admitdxtext values in detail.



In [5]:

    
for i, row in df.iterrows():
    print('Row {}'.format(i+1))
    for c in ['admitdxpath', 'admitdxname', 'admitdxtext']:
        print('  {}: {}'.format(c, row[c]))









    



Row 1
  admitdxpath: admission diagnosis|Non-operative Organ Systems|Organ System|Hematology
  admitdxname: Hematology
  admitdxtext: Hematology
Row 2
  admitdxpath: admission diagnosis|Was the patient admitted from the O.R. or went to the O.R. within 4 hours of admission?|No
  admitdxname: No
  admitdxtext: No
Row 3
  admitdxpath: admission diagnosis|All Diagnosis|Non-operative|Diagnosis|Hematology|Thrombocytopenia
  admitdxname: Thrombocytopenia
  admitdxtext: Thrombocytopenia

We can see that these rows are providing us: (1) the APACHE diagnosis, (2) auxiliary admission diagnosis information. Row 1 informs us of the organ system (Hematology), row 2 informs us that the patient was not admitted to/from the operating room (O.R.), and row 3 gives us the full APACHE IV admission diagnosis (thrombocytopenia).

Summarize diagnoses



In [6]:

    
query = query_schema + """
select admitdxpath, count(*) as numobs
from admissiondx
group by admitdxpath
order by numobs desc
"""

df_grp = pd.read_sql_query(query, con)
for i, row in df_grp.head(n=10).iterrows():
    print('{:6d}   {}'.format(row['numobs'], row['admitdxpath']))









    



143620   admission diagnosis|Was the patient admitted from the O.R. or went to the O.R. within 4 hours of admission?|No
 63930   admission diagnosis|Non-operative Organ Systems|Organ System|Cardiovascular
 34243   admission diagnosis|Was the patient admitted from the O.R. or went to the O.R. within 4 hours of admission?|Yes
 30577   admission diagnosis|Elective|Yes
 25750   admission diagnosis|Non-operative Organ Systems|Organ System|Neurologic
 23230   admission diagnosis|Non-operative Organ Systems|Organ System|Respiratory
 15630   admission diagnosis|Operative Organ Systems|Organ System|Cardiovascular
 12758   admission diagnosis|Additional APACHE  Information|Thrombolytic Therapy received within 24 hours|No
 11830   admission diagnosis|Non-operative Organ Systems|Organ System|Gastrointestinal
  8862   admission diagnosis|All Diagnosis|Non-operative|Diagnosis|Cardiovascular|Sepsis, pulmonary

Here we can see the most popular information present in the table: admission from OR, admission for elective surgery, popular body systems, and popular diagnoses.

Patients missing data

Patients with APACHE scores almost always have a diagnosis in the admissiondx table.



In [7]:

    
query = query_schema + """
with dx as
(
select distinct patientunitstayid from admissiondx
where admitdxpath like '%|All Diagnosis|%'
)
, apv as
(
select patientunitstayid
from apachepatientresult
where apacheversion = 'IVa'
and apachescore <> '-1'
)
select count(pt.patientunitstayid) as n
, count(dx.patientunitstayid) as n_with_dx
, count(apv.patientunitstayid) as n_with_apachepred
, sum(
case when apv.patientunitstayid is not null
and dx.patientunitstayid is null
then 1 else 0 end) as n_pred_with_no_dx
from patient pt
left join dx
on pt.patientunitstayid = dx.patientunitstayid
left join apv
on pt.patientunitstayid = apv.patientunitstayid
"""

df_ct = pd.read_sql_query(query, con)
df_ct









    Out[7]:







  
    
      
      n
      n_with_dx
      n_with_apachepred
      n_pred_with_no_dx
    
  
  
    
      0
      200859
      177863
      146696
      389

Identifying patients with a specific admission diagnosis

Let's look for patients who were admitted with sepsis. First, we search for these diagnoses to manually curate them.



In [8]:

    
import re
# remove prefixes from admission dx strings for simplicity
def remove_prefix(text, prefix):
    return re.sub(r'^{0}'.format(re.escape(prefix)), '', text)



In [9]:

    
dx = 'sepsis'
query = query_schema + """
select 
  admitdxpath
  , count(*) as numobs
  , max(case when lower(admitdxpath) like '%{}%' then 1 else 0 end) as valid_dx
from admissiondx
-- isolate to just diagnoses (not auxiliary info like elective admission)
where admitdxpath like '%|All Diagnosis|%'
group by admitdxpath
order by numobs desc
""".format(dx)

df_dx = pd.read_sql_query(query, con)
print('{} diagnoses in total.'.format(df_dx.shape[0]))
print('{} contain the dx string {}.'.format(df_dx['valid_dx'].sum(), dx))

# trim some of the text from the admitdxpath
df_dx['dx'] = df_dx['admitdxpath'].apply(lambda x: remove_prefix(x, 'admission diagnosis|All Diagnosis|'))

# add operative/non-operative
df_dx['op'] = df_dx['dx'].apply(lambda x: x[0:13] == 'Non-operative')

# strip operative/non-operative text
df_dx['dx'] = df_dx['dx'].apply(lambda x: remove_prefix(x, 'Non-operative|Diagnosis|'))
df_dx['dx'] = df_dx['dx'].apply(lambda x: remove_prefix(x, 'Operative|Diagnosis|'))

df_dx[['dx','numobs','valid_dx']].head()









    



426 diagnoses in total.
7 contain the dx string sepsis.






    Out[9]:







  
    
      
      dx
      numobs
      valid_dx
    
  
  
    
      0
      Cardiovascular|Sepsis, pulmonary
      8862
      1
    
    
      1
      Cardiovascular|Infarction, acute myocardial (MI)
      7228
      0
    
    
      2
      Neurology|CVA, cerebrovascular accident/stroke
      6647
      0
    
    
      3
      Cardiovascular|CHF, congestive heart failure
      6617
      0
    
    
      4
      Cardiovascular|Sepsis, renal/UTI (including bl...
      5273
      1

Since there are only 426 diagnoses, we can manually review and confirm that we cover all possible sepsis diagnoses. However, even still, keep in mind that these diagnoses are chosen according to the APACHE scoring criteria. For example, if a patient was admitted with sepsis secondary to a surgery, then the admission diagnosis would need to be the surgery they were originally hospitalized for. Thus we can consider these diagnoses specific, but not necessarily sensitive.



In [12]:

    
sepsis_dx = df_dx.loc[df_dx['valid_dx']==1, 'admitdxpath'].values
sepsis_dx = ["'" + x + "'" for x in sepsis_dx]

print('Using the following sepsis diagnoses:')
print('{}'.format('\n'.join(sepsis_dx)))

sepsis_dx = ','.join(sepsis_dx)

print()
query = query_schema + """
select 
  patientunitstayid, admitdxpath
from admissiondx
where admitdxpath in
(
{}
)
""".format(sepsis_dx)

df = pd.read_sql_query(query, con)
print('{} admissions found with an admission diagnosis of {}.'.format(df.shape[0], dx))









    



Using the following sepsis diagnoses:
'admission diagnosis|All Diagnosis|Non-operative|Diagnosis|Cardiovascular|Sepsis, pulmonary'
'admission diagnosis|All Diagnosis|Non-operative|Diagnosis|Cardiovascular|Sepsis, renal/UTI (including bladder)'
'admission diagnosis|All Diagnosis|Non-operative|Diagnosis|Cardiovascular|Sepsis, GI'
'admission diagnosis|All Diagnosis|Non-operative|Diagnosis|Cardiovascular|Sepsis, unknown'
'admission diagnosis|All Diagnosis|Non-operative|Diagnosis|Cardiovascular|Sepsis, cutaneous/soft tissue'
'admission diagnosis|All Diagnosis|Non-operative|Diagnosis|Cardiovascular|Sepsis, other'
'admission diagnosis|All Diagnosis|Non-operative|Diagnosis|Cardiovascular|Sepsis, gynecologic'

23136 admissions found with an admission diagnosis of sepsis.

	patientunitstayid	admissiondxid	admitdxenteredyear	admitdxenteredtime24	admitdxenteredtime	admitdxenteredoffset	admitdxpath	admitdxname	admitdxtext
0	2704494	7981672	2014	04:45:42	morning	20	admission diagnosis\|Non-operative Organ System...	Hematology	Hematology
1	2704494	7981671	2014	04:45:42	morning	20	admission diagnosis\|Was the patient admitted f...	No	No
2	2704494	7981673	2014	04:45:42	morning	20	admission diagnosis\|All Diagnosis\|Non-operativ...	Thrombocytopenia	Thrombocytopenia

	dx	numobs	valid_dx
0	Cardiovascular\|Sepsis, pulmonary	8862	1
1	Cardiovascular\|Infarction, acute myocardial (MI)	7228	0
2	Neurology\|CVA, cerebrovascular accident/stroke	6647	0
3	Cardiovascular\|CHF, congestive heart failure	6617	0
4	Cardiovascular\|Sepsis, renal/UTI (including bl...	5273	1