In [1]:
# Import libraries
from __future__ import print_function
import numpy as np
import pandas as pd
import psycopg2
import socket
import sys
import os
import getpass
from collections import OrderedDict
import matplotlib
import matplotlib.pyplot as plt
# colours for prettier plots
import colorsys
def gg_color_hue(n):
hues = np.linspace(15, 375, n)
hsv_tuples = [(x*1.0/360.0, 0.5, 0.8) for x in hues]
rgb_tuples = map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)
return rgb_tuples
marker = ['v','o','d','^','s','o','+']
ls = ['-','-','-','-','-','s','--','--']
from IPython.display import display, HTML
# plot settings
%matplotlib inline
plt.style.use('ggplot')
font = {'size' : 20}
matplotlib.rc('font', **font)
In [2]:
sqluser = getpass.getuser()
print('Using username {}'.format(sqluser))
dbname = 'mimic'
schema_name = 'mimiciii'
query_schema = 'SET search_path to public,' + schema_name + ';'
# Connect to local postgres version of mimic
con = psycopg2.connect(dbname=dbname, user=sqluser)
print('Connected to postgres {}.{}.{}!'.format(int(con.server_version/10000),
int((con.server_version - int(con.server_version/10000)*10000)/100),
int(con.server_version - int(con.server_version/100)*100)))
In [3]:
query = query_schema + """
SELECT drug_name_generic, drug_name_poe, drug, count(*) as numobs
FROM prescriptions
WHERE lower(drug) LIKE '%vanco%'
OR lower(drug_name_generic) LIKE '%vanco%'
OR lower(drug_name_poe) LIKE '%vanco%'
GROUP BY drug, drug_name_generic, drug_name_poe
ORDER BY drug_name_generic, drug_name_poe, drug;
"""
df = pd.read_sql_query(query, con)
df
Out[3]:
The following will be deleted as we are only concerned with vancomycin administered intravenously:
From above we keep ...
drug in
(
'NEO*IV*Vancomycin', 'Vancocin', 'Vancomycin',
'Vancomycin ', 'Vancomycin Antibiotic Lock',
'Vancomycin Desensitization', 'Vancomycin HCl'
)
Next, we determine which itemids from labevents to keep.
In [22]:
query = query_schema + """
SELECT *
FROM d_labitems
WHERE lower(label) like '%vanco%'
"""
pd.read_sql_query(query, con)
Out[22]:
From above, we keep everything!
Next, we check which itemids from chartevents to keep.
In [26]:
query = query_schema + """
SELECT *
FROM d_items
WHERE lower(label) like '%vanco%'
AND dbsource = 'metavision'
"""
pd.read_sql_query(query, con)
Out[26]:
In carevue, Alistair said they don't record vancomycin administration time so we will only consider data from metavision.
We also only care about trough values and vancomycin administration. So from above we keep ...
itemid in
(
227455, 225798, 226064
)
In [4]:
def get_data_for_subject(subject_id, query_schema, con):
# lab data
query = query_schema + """
select
subject_id, hadm_id, di.itemid, di.label
, charttime, value, valuenum, valueuom, flag
from labevents le
inner join d_labitems di
on le.itemid = di.itemid
where subject_id = {}
and di.itemid = 51009 order by charttime;
""".format(subject_id)
lab = pd.read_sql_query(query, con)
# charted data
query = query_schema + """
select
subject_id, hadm_id, icustay_id
, di.itemid, di.label
, charttime, storetime
, value, valuenum, valueuom
, error, resultstatus
from chartevents ce
inner join d_items di
on ce.itemid = di.itemid
where subject_id = {}
and di.itemid in
(
854, 855, 856, 1354, 5873, 3827, 3828, 2273, 3679, 6261
, 227453, 227454, 227455, 225697, 226064, 226065
)
order by charttime;
""".format(subject_id)
ce = pd.read_sql_query(query, con)
# input data (mv)
query = query_schema + """
select
subject_id, hadm_id, icustay_id
, di.itemid, di.label
, starttime, endtime, storetime
, rate, rateuom
, amount, amountuom
, statusdescription
from inputevents_mv mv
inner join d_items di
on mv.itemid = di.itemid
where subject_id = {}
and di.itemid = 225798
order by starttime;
""".format(subject_id)
imv = pd.read_sql_query(query, con)
# no vanco data in CV
# prescriptions
query = query_schema + """
select *
from prescriptions
where subject_id = {}
and drug in
(
'Vancomycin HCl', 'Vancomycin Oral Liquid', 'NEO*IV*Vancomycin'
, 'Vancomycin', 'Vancomycin Antibiotic Lock' , 'Vancomycin Desensitization'
, 'Vancomycin HCl', 'Vancomycin Intrathecal', ' Vancomycin Intraventricular'
, 'Vancocin'
)
order by startdate;
""".format(subject_id)
pr = pd.read_sql_query(query, con)
# transfers info
query = query_schema + """
select * from transfers
where subject_id = {};
""".format(subject_id)
tr = pd.read_sql_query(query, con)
return lab, ce, imv, pr, tr
The second function combines the individual dataframes into a single dataframe. It does this by combining columns together - so while data in a single column no longer consistently represents the same concept, it is easier to read.
In [7]:
# union the above dataframes together by renaming columns etc
# column names are no longer 100% correct but it's easier to interpret the data this way
def combine_into_single_dataframe(lab, ce, imv, pr, tr):
cols = ['subject_id', 'hadm_id', 'source', 'label', 'charttime', 'endtime', 'value', 'valuenum']
lab['endtime'] = None
lab['source'] = 'lab'
ce['endtime'] = None
ce['source'] = 'chart'
imv['charttime'] = imv['starttime']
imv['value'] = imv['amount'].astype(str)
imv['valuenum'] = imv['rate']
imv['source'] = 'inputs'
pr['charttime'] = pr['startdate']
pr['endtime'] = pr['enddate']
pr['value'] = pr['route']
pr['label'] = pr['drug']
pr['valuenum'] = pr['dose_val_rx']
pr['source'] = 'poe'
# add in the start/end time of ICU
icu_admit = tr.loc[ (~tr['curr_careunit'].isnull()), :].copy()
icu_disch = tr.loc[ (~tr['curr_careunit'].isnull()), :].copy()
icu_admit['source'] = 'icu admit'
icu_disch['source'] = 'icu disch'
icu_admit['endtime'] = None
icu_disch['endtime'] = None
icu_admit['valuenum'] = None
icu_disch['valuenum'] = None
icu_rename_dict = {'icustay_id': 'label', 'curr_careunit': 'value'}
icu_admit.rename( icu_rename_dict, axis=1, inplace=True )
icu_disch.rename( icu_rename_dict, axis=1, inplace=True )
# set charttine - for admission, its intime, for discharge, its outtime
icu_admit.rename({'intime': 'charttime'}, axis=1, inplace=True)
icu_disch.rename({'outtime': 'charttime'}, axis=1, inplace=True)
df = pd.concat([ lab[cols], ce[cols], imv[cols], pr[cols], icu_admit[cols], icu_disch[cols] ],
axis=0, ignore_index=True )
df.sort_values('charttime', ascending=True, inplace=True)
df.reset_index(drop=True, inplace=True)
return df
In [8]:
# pick the subject
subject_id = 97920
# get data for this subject
lab, ce, imv, pr, tr = get_data_for_subject(subject_id, query_schema, con)
# combine dataframes together
df = combine_into_single_dataframe(lab, ce, imv, pr, tr)
# drop subject/hadm_id
df.drop(['subject_id', 'hadm_id'], axis=1, inplace=True)
# display dataframe
display(HTML(df.to_html().replace('None','')))
The above makes sense - summarizing the flow:
We can try another subject.
In [9]:
# pick the subject
subject_id = 94864
# get data for this subject
lab, ce, imv, pr, tr = get_data_for_subject(subject_id, query_schema, con)
# combine dataframes together
df = combine_into_single_dataframe(lab, ce, imv, pr, tr)
# drop subject/hadm_id
df.drop(['subject_id', 'hadm_id'], axis=1, inplace=True)
# display dataframe
display(HTML(df.to_html().replace('None','')))
The above is mostly sensible - except perhaps for rows 16/17. Here, we can an increase in vancomycin levels, but we don't have an associated dose to explain it.
In [10]:
# pick the subject
subject_id = 357
# get data for this subject
lab, ce, imv, pr, tr = get_data_for_subject(subject_id, query_schema, con)
# combine dataframes together
df = combine_into_single_dataframe(lab, ce, imv, pr, tr)
# drop subject/hadm_id
df.drop(['subject_id', 'hadm_id'], axis=1, inplace=True)
# display dataframe
display(HTML(df.to_html().replace('None','')))
This patient is admitted under the carevue system, and unfortunately we do not have documentation of IV antibiotics for these patients. However, we note that the prescriptions ('poe') appear to match the charted data.
For the last few rows, it is odd that there is no measurement of vancomycin between the 25th-26th.
In [11]:
con.close()