Care plans

Care plans are used by staff to keep track of both current and planned procedures for the patient. Care plans are documented in five tables: carePlanCareProvider, carePlanEOL, carePlanGeneral, carePlanGoal, and carePlanInfectiousDisease.



In [1]:

    
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import getpass
import pdvega

# for configuring connection 
from configobj import ConfigObj
import os

%matplotlib inline









    



/home/alistairewj/.local/lib/python3.5/site-packages/psycopg2/__init__.py:144: UserWarning: The psycopg2 wheel package will be renamed from release 2.8; in order to keep installing from binary please use "pip install psycopg2-binary" instead. For details see: <http://initd.org/psycopg/docs/install.html#binary-install-from-pypi>.
  """)



In [2]:

    
# Create a database connection using settings from config file
config='../db/config.ini'

# connection info
conn_info = dict()
if os.path.isfile(config):
    config = ConfigObj(config)
    conn_info["sqluser"] = config['username']
    conn_info["sqlpass"] = config['password']
    conn_info["sqlhost"] = config['host']
    conn_info["sqlport"] = config['port']
    conn_info["dbname"] = config['dbname']
    conn_info["schema_name"] = config['schema_name']
else:
    conn_info["sqluser"] = 'postgres'
    conn_info["sqlpass"] = ''
    conn_info["sqlhost"] = 'localhost'
    conn_info["sqlport"] = 5432
    conn_info["dbname"] = 'eicu'
    conn_info["schema_name"] = 'public,eicu_crd'
    
# Connect to the eICU database
print('Database: {}'.format(conn_info['dbname']))
print('Username: {}'.format(conn_info["sqluser"]))
if conn_info["sqlpass"] == '':
    # try connecting without password, i.e. peer or OS authentication
    try:
        if (conn_info["sqlhost"] == 'localhost') & (conn_info["sqlport"]=='5432'):
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                   user=conn_info["sqluser"])            
        else:
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                   host=conn_info["sqlhost"],
                                   port=conn_info["sqlport"],
                                   user=conn_info["sqluser"])
    except:
        conn_info["sqlpass"] = getpass.getpass('Password: ')

        con = psycopg2.connect(dbname=conn_info["dbname"],
                               host=conn_info["sqlhost"],
                               port=conn_info["sqlport"],
                               user=conn_info["sqluser"],
                               password=conn_info["sqlpass"])
query_schema = 'set search_path to ' + conn_info['schema_name'] + ';'









    



Database: eicu
Username: alistairewj

Summarize data available in each table

Examine data for one patient

Hospitals with data available



In [3]:

    
query = query_schema + """
with cpc as ( select distinct patientunitstayid from careplancareprovider )
, cpeol as ( select distinct patientunitstayid from careplaneol )
, cpge as ( select distinct patientunitstayid from careplangeneral )
, cpgo as ( select distinct patientunitstayid from careplangoal )
, cpid as ( select distinct patientunitstayid from careplaninfectiousdisease )
select 
  pt.hospitalid
  , count(distinct pt.patientunitstayid) as number_of_patients
  , count(distinct cpc.patientunitstayid) as n_cpc
  , count(distinct cpeol.patientunitstayid) as n_cpeol
  , count(distinct cpge.patientunitstayid) as n_cpge
  , count(distinct cpgo.patientunitstayid) as n_cpgo
  , count(distinct cpid.patientunitstayid) as n_cpid
from patient pt
left join cpc
  on pt.patientunitstayid = cpc.patientunitstayid
left join cpeol
  on pt.patientunitstayid = cpeol.patientunitstayid
left join cpge
  on pt.patientunitstayid = cpge.patientunitstayid
left join cpgo
  on pt.patientunitstayid = cpgo.patientunitstayid
left join cpid
  on pt.patientunitstayid = cpid.patientunitstayid
group by pt.hospitalid
""".format()

df = pd.read_sql_query(query, con)
# convert to percent
cols = ['n_cpc', 'n_cpeol', 'n_cpge', 'n_cpgo', 'n_cpid']
for c in cols:
    df[c] = df[c].astype(float) / df['number_of_patients'] * 100.0
    
df.sort_values('number_of_patients', ascending=False, inplace=True)
df.head(n=10)









    Out[3]:







  
    
      
      hospitalid
      number_of_patients
      n_cpc
      n_cpeol
      n_cpge
      n_cpgo
      n_cpid
    
  
  
    
      11
      73
      7059
      86.669500
      0.000000
      99.065023
      0.000000
      0.000000
    
    
      54
      167
      6092
      52.954695
      0.082075
      99.753775
      0.000000
      0.016415
    
    
      106
      264
      5237
      96.429253
      0.038190
      98.644262
      0.000000
      0.000000
    
    
      184
      420
      4679
      99.251977
      3.483650
      96.281257
      91.194700
      1.816627
    
    
      58
      176
      4328
      52.356747
      0.023105
      99.745841
      0.000000
      0.000000
    
    
      134
      338
      4277
      97.685293
      0.023381
      99.415478
      0.140285
      0.023381
    
    
      90
      243
      4243
      97.784586
      0.400660
      97.407495
      3.676644
      5.467829
    
    
      71
      199
      4240
      91.910377
      0.259434
      95.353774
      0.306604
      0.141509
    
    
      206
      458
      3701
      98.324777
      0.000000
      99.000270
      0.027020
      0.000000
    
    
      200
      443
      3656
      92.970460
      0.000000
      99.234136
      0.000000
      0.000000



In [5]:

    
# rename columns within the call for clarity in the figure
column_label = {'n_cpc': 'Care provider',
               'n_cpeol': 'EOL',
                'n_cpge': 'General',
                'n_cpgo': 'Goal',
                'n_cpid': 'Infectious disease'}

df[cols].rename(columns=column_label).vgplot.hist(bins=10, stacked=True,
                                    var_name='Number of hospitals',
                                    value_name='Percent of patients with data')

As we can see, the majority of hospitals do not have data for the goal, infectious disease, and EOL tables. Conversely, the care provider and general tables have good coverage.

	hospitalid	number_of_patients	n_cpc	n_cpeol	n_cpge	n_cpgo	n_cpid
11	73	7059	86.669500	0.000000	99.065023	0.000000	0.000000
54	167	6092	52.954695	0.082075	99.753775	0.000000	0.016415
106	264	5237	96.429253	0.038190	98.644262	0.000000	0.000000
184	420	4679	99.251977	3.483650	96.281257	91.194700	1.816627
58	176	4328	52.356747	0.023105	99.745841	0.000000	0.000000
134	338	4277	97.685293	0.023381	99.415478	0.140285	0.023381
90	243	4243	97.784586	0.400660	97.407495	3.676644	5.467829
71	199	4240	91.910377	0.259434	95.353774	0.306604	0.141509
206	458	3701	98.324777	0.000000	99.000270	0.027020	0.000000
200	443	3656	92.970460	0.000000	99.234136	0.000000	0.000000