We first need to import some tools for working with data in Python.
In [ ]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sqlite3
%matplotlib inline
In [ ]:
# Connect to the MIMIC database
conn = sqlite3.connect('data/mimicdata.sqlite')
In [ ]:
# Create our test query
test_query = """
SELECT subject_id, hadm_id, admittime, dischtime, admission_type, diagnosis
FROM admissions
LIMIT 10;
"""
In [ ]:
# Run the query and assign the results to a variable
test = pd.read_sql_query(test_query,conn)
In [ ]:
# Display the first few rows
test.head()
In [ ]:
query = """
SELECT de.icustay_id
, (strftime('%s',de.charttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS
, di.label
, de.value
, de.valuenum
, de.uom
FROM chartevents de
INNER join d_items di
ON de.itemid = di.itemid
INNER join icustays ie
ON de.icustay_id = ie.icustay_id
WHERE de.subject_id = 40036
ORDER BY charttime;
"""
ce = pd.read_sql_query(query,conn)
In [ ]:
# Preview the data
# Use 'head' to limit the number of rows returned
ce.head()
ce.LABEL
or alternatively ce['LABEL']
In [ ]:
# Select a single column
ce['LABEL']
[ce.LABEL=='Heart Rate']
In [ ]:
# Select just the heart rate rows using an index
ce[ce.LABEL=='Heart Rate']
In [ ]:
# Which time stamps have a corresponding heart rate measurement?
print ce.index[ce.LABEL=='Heart Rate']
In [ ]:
# Set x equal to the times
x_hr = ce.HOURS[ce.LABEL=='Heart Rate']
# Set y equal to the heart rates
y_hr = ce.VALUENUM[ce.LABEL=='Heart Rate']
# Plot time against heart rate
plt.figure(figsize=(14, 6))
plt.plot(x_hr,y_hr)
plt.xlabel('Time',fontsize=16)
plt.ylabel('Heart rate',fontsize=16)
plt.title('Heart rate over time from admission to the intensive care unit')
In [ ]:
# Exercise 1 here
In [ ]:
plt.figure(figsize=(14, 6))
plt.plot(ce.HOURS[ce.LABEL=='Respiratory Rate'],
ce.VALUENUM[ce.LABEL=='Respiratory Rate'],
'k+', markersize=10, linewidth=4)
plt.plot(ce.HOURS[ce.LABEL=='Resp Alarm - High'],
ce.VALUENUM[ce.LABEL=='Resp Alarm - High'],
'm--')
plt.plot(ce.HOURS[ce.LABEL=='Resp Alarm - Low'],
ce.VALUENUM[ce.LABEL=='Resp Alarm - Low'],
'm--')
plt.xlabel('Time',fontsize=16)
plt.ylabel('Respiratory rate',fontsize=16)
plt.title('Respiratory rate over time from admission, with upper and lower alarm thresholds')
plt.ylim(0,55)
In [ ]:
# Display the first few rows of the GCS eye response data
ce[ce.LABEL=='GCS - Eye Opening'].head()
In [ ]:
# Prepare the size of the figure
plt.figure(figsize=(14, 10))
# Set x equal to the times
x_hr = ce.HOURS[ce.LABEL=='Heart Rate']
# Set y equal to the heart rates
y_hr = ce.VALUENUM[ce.LABEL=='Heart Rate']
plt.plot(x_hr,y_hr)
plt.plot(ce.HOURS[ce.LABEL=='Respiratory Rate'],
ce.VALUENUM[ce.LABEL=='Respiratory Rate'],
'k', markersize=6)
# Add a text label to the y-axis
plt.text(-4,155,'GCS - Eye Opening',fontsize=14)
plt.text(-4,150,'GCS - Motor Response',fontsize=14)
plt.text(-4,145,'GCS - Verbal Response',fontsize=14)
# Iterate over list of GCS labels, plotting around 1 in 10 to avoid overlap
for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Eye Opening'].values):
if np.mod(i,6)==0 and i < 65:
plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Eye Opening'].values[i],155),fontsize=14)
for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Motor Response'].values):
if np.mod(i,6)==0 and i < 65:
plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Motor Response'].values[i],150),fontsize=14)
for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Verbal Response'].values):
if np.mod(i,6)==0 and i < 65:
plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Verbal Response'].values[i],145),fontsize=14)
plt.title('Vital signs and Glasgow Coma Scale over time from admission',fontsize=16)
plt.xlabel('Time (hours)',fontsize=16)
plt.ylabel('Heart rate or GCS',fontsize=16)
plt.ylim(10,165)
In [ ]:
# OPTION 1: load outputs from the patient
query = """
select de.icustay_id
, (strftime('%s',de.charttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS
, di.label
, de.value
, de.valueuom
from outputevents de
inner join icustays ie
on de.icustay_id = ie.icustay_id
inner join d_items di
on de.itemid = di.itemid
where de.subject_id = 40036
order by charttime;
"""
oe = pd.read_sql_query(query,conn)
In [ ]:
oe.head()
In [ ]:
# Prepare the size of the figure
plt.figure(figsize=(14, 10))
plt.title('Fluid output over time')
plt.plot(oe.HOURS,
oe.VALUE.cumsum()/1000,
'ro', markersize=8, label='Output volume, L')
plt.xlim(0,20)
plt.ylim(0,2)
plt.legend()
To provide context for this plot, it would help to include patient input data. This helps to determine the patient's fluid balance, a key indicator in patient health.
In [ ]:
# Load inputs given to the patient (usually intravenously) using the database connection
query = """
select de.icustay_id
, (strftime('%s',de.starttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS_START
, (strftime('%s',de.endtime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS_END
, de.linkorderid
, di.label
, de.amount
, de.amountuom
, de.rate
, de.rateuom
from inputevents_mv de
inner join icustays ie
on de.icustay_id = ie.icustay_id
inner join d_items di
on de.itemid = di.itemid
where de.subject_id = 40036
order by endtime;
"""
ie = pd.read_sql_query(query,conn)
In [ ]:
ie.head()
Note that the column headers are different: we have "HOURS_START" and "HOURS_END". This is because inputs are administered over a fixed period of time.
In [ ]:
ie['LABEL'].unique()
In [ ]:
# Prepare the size of the figure
plt.figure(figsize=(14, 10))
# Plot the cumulative input against the cumulative output
plt.plot(ie.HOURS_END[ie.AMOUNTUOM=='mL'],
ie.AMOUNT[ie.AMOUNTUOM=='mL'].cumsum()/1000,
'go', markersize=8, label='Intake volume, L')
plt.plot(oe.HOURS,
oe.VALUE.cumsum()/1000,
'ro', markersize=8, label='Output volume, L')
plt.title('Fluid balance over time',fontsize=16)
plt.xlabel('Hours',fontsize=16)
plt.ylabel('Volume (litres)',fontsize=16)
# plt.ylim(0,38)
plt.legend()
As the plot shows, the patient's intake tends to be above their output. There are however periods where input and output are almost one to one. One of the biggest challenges of working with ICU data is that context is everything, so let's look at a treatment (Furosemide/Lasix) which we know will affect this graph.
In [ ]:
plt.figure(figsize=(14, 10))
# Plot the cumulative input against the cumulative output
plt.plot(ie.HOURS_END[ie.AMOUNTUOM=='mL'],
ie.AMOUNT[ie.AMOUNTUOM=='mL'].cumsum()/1000,
'go', markersize=8, label='Intake volume, L')
plt.plot(oe.HOURS,
oe.VALUE.cumsum()/1000,
'ro', markersize=8, label='Output volume, L')
# example on getting two columns from a dataframe: ie[['HOURS_START','HOURS_END']].head()
for i, idx in enumerate(ie.index[ie.LABEL=='Furosemide (Lasix)']):
plt.plot([ie.HOURS_START[ie.LABEL=='Furosemide (Lasix)'][idx],
ie.HOURS_END[ie.LABEL=='Furosemide (Lasix)'][idx]],
[ie.RATE[ie.LABEL=='Furosemide (Lasix)'][idx],
ie.RATE[ie.LABEL=='Furosemide (Lasix)'][idx]],
'b-',linewidth=4)
plt.title('Fluid balance over time',fontsize=16)
plt.xlabel('Hours',fontsize=16)
plt.ylabel('Volume (litres)',fontsize=16)
# plt.ylim(0,38)
plt.legend()
In [ ]:
ie['LABEL'].unique()
In [ ]:
# Exercise 2 here
In [ ]:
plt.figure(figsize=(14, 10))
plt.plot(ce.index[ce.LABEL=='Heart Rate'],
ce.VALUENUM[ce.LABEL=='Heart Rate'],
'rx', markersize=8, label='HR')
plt.plot(ce.index[ce.LABEL=='O2 saturation pulseoxymetry'],
ce.VALUENUM[ce.LABEL=='O2 saturation pulseoxymetry'],
'g.', markersize=8, label='O2')
plt.plot(ce.index[ce.LABEL=='Arterial Blood Pressure mean'],
ce.VALUENUM[ce.LABEL=='Arterial Blood Pressure mean'],
'bv', markersize=8, label='MAP')
plt.plot(ce.index[ce.LABEL=='Respiratory Rate'],
ce.VALUENUM[ce.LABEL=='Respiratory Rate'],
'k+', markersize=8, label='RR')
plt.title('Vital signs over time from admission')
plt.ylim(0,130)
plt.legend()
Using Pandas 'read_csv function' again, we'll now load the labevents data. This data corresponds to measurements made in a laboratory - usually on a sample of patient blood.
In [ ]:
# OPTION 1: load labevents data using the database connection
query = """
SELECT de.subject_id
, de.charttime
, di.label, de.value, de.valuenum
, de.uom
FROM labevents de
INNER JOIN d_labitems di
ON de.itemid = di.itemid
where de.subject_id = 40036
"""
le = pd.read_sql_query(query,conn)
In [ ]:
# preview the labevents data
le.head()
In [ ]:
# preview the ioevents data
le[le.LABEL=='HEMOGLOBIN']
In [ ]:
plt.figure(figsize=(14, 10))
plt.plot(le.index[le.LABEL=='HEMATOCRIT'],
le.VALUENUM[le.LABEL=='HEMATOCRIT'],
'go', markersize=6, label='Haematocrit')
plt.plot(le.index[le.LABEL=='HEMOGLOBIN'],
le.VALUENUM[le.LABEL=='HEMOGLOBIN'],
'bv', markersize=8, label='Hemoglobin')
plt.title('Laboratory measurements over time from admission')
plt.ylim(0,38)
plt.legend()
In [ ]:
In [ ]:
# load ioevents
ioe = pd.read_csv('data/example_ioevents.csv',index_col='HOURSSINCEADMISSION_START')
In [ ]:
ioe.head()
In [ ]:
plt.figure(figsize=(14, 10))
plt.plot(ioe.index[ioe.LABEL=='Midazolam (Versed)'],
ioe.RATE[ioe.LABEL=='Midazolam (Versed)'],
'go', markersize=6, label='Midazolam (Versed)')
plt.plot(ioe.index[ioe.LABEL=='Propofol'],
ioe.RATE[ioe.LABEL=='Propofol'],
'bv', markersize=8, label='Propofol')
plt.plot(ioe.index[ioe.LABEL=='Fentanyl'],
ioe.RATE[ioe.LABEL=='Fentanyl'],
'k+', markersize=8, label='Fentanyl')
plt.title('IOevents over time from admission')
plt.ylim(0,380)
plt.legend()
In [ ]:
In [ ]:
plt.figure(figsize=(14, 10))
plt.plot(ioe.index[ioe.LABEL=='OR Cryoprecipitate Intake'],
ioe.VALUENUM[ioe.LABEL=='OR Cryoprecipitate Intake'],
'go', markersize=6, label='OR Cryoprecipitate Intake')
plt.plot(ioe.index[ioe.LABEL=='OR Crystalloid Intake'],
ioe.VALUENUM[ioe.LABEL=='OR Crystalloid Intake'],
'bv', markersize=8, label='OR Crystalloid Intake')
plt.plot(ioe.index[ioe.LABEL=='OR FFP Intake'],
ioe.VALUENUM[ioe.LABEL=='OR FFP Intake'],
'k+', markersize=8, label='OR FFP Intake')
plt.plot(ioe.index[ioe.LABEL=='OR Packed RBC Intake'],
ioe.VALUENUM[ioe.LABEL=='OR Packed RBC Intake'],
'k+', markersize=8, label='OR Packed RBC Intake')
plt.plot(ioe.index[ioe.LABEL=='OR Platelet Intake'],
ioe.VALUENUM[ioe.LABEL=='OR Platelet Intake'],
'k+', markersize=8, label='OR Platelet Intake')
plt.title('Blood products administered over time from admission')
plt.legend()
In [ ]:
# insert discharge summary here...