We first need to import some tools for working with data in Python.
In [ ]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sqlite3
%matplotlib inline
In [ ]:
# Connect to the MIMIC database
conn = sqlite3.connect('data/mimicdata.sqlite')
In [ ]:
# Create our test query
test_query = """
SELECT subject_id, hadm_id, admittime, dischtime, admission_type, diagnosis
FROM admissions
"""
In [ ]:
# Run the query and assign the results to a variable
test = pd.read_sql_query(test_query,conn)
In [ ]:
# Display the first few rows
test.head()
In [ ]:
query = """
SELECT de.icustay_id
, (strftime('%s',de.charttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS
, di.label
, de.value
, de.valuenum
, de.uom
FROM chartevents de
INNER join d_items di
ON de.itemid = di.itemid
INNER join icustays ie
ON de.icustay_id = ie.icustay_id
WHERE de.icustay_id = 252522
ORDER BY charttime;
"""
ce = pd.read_sql_query(query,conn)
# OPTION 2: load chartevents from a CSV file
# ce = pd.read_csv('data/example_chartevents.csv', index_col='HOURSSINCEADMISSION')
In [ ]:
# Preview the data
# Use 'head' to limit the number of rows returned
ce.head()
ce.LABEL
or alternatively ce['LABEL']
In [ ]:
# Select a single column
ce['LABEL']
[ce.LABEL=='Heart Rate']
In [ ]:
# Select just the heart rate rows using an index
ce[ce.LABEL=='Heart Rate']
In [ ]:
# Which time stamps have a corresponding heart rate measurement?
print ce.index[ce.LABEL=='Heart Rate']
In [ ]:
# Set x equal to the times
x_hr = ce.HOURS[ce.LABEL=='Heart Rate']
# Set y equal to the heart rates
y_hr = ce.VALUENUM[ce.LABEL=='Heart Rate']
# Plot time against heart rate
plt.figure(figsize=(14, 6))
plt.plot(x_hr,y_hr)
plt.xlabel('Time',fontsize=16)
plt.ylabel('Heart rate',fontsize=16)
plt.title('Heart rate over time from admission to the intensive care unit')
In [ ]:
# Exercise 1 here
In [ ]:
plt.figure(figsize=(14, 6))
plt.plot(ce.HOURS[ce.LABEL=='Respiratory Rate'],
ce.VALUENUM[ce.LABEL=='Respiratory Rate'],
'k+', markersize=10, linewidth=4)
plt.plot(ce.HOURS[ce.LABEL=='Resp Alarm - High'],
ce.VALUENUM[ce.LABEL=='Resp Alarm - High'],
'm--')
plt.plot(ce.HOURS[ce.LABEL=='Resp Alarm - Low'],
ce.VALUENUM[ce.LABEL=='Resp Alarm - Low'],
'm--')
plt.xlabel('Time',fontsize=16)
plt.ylabel('Respiratory rate',fontsize=16)
plt.title('Respiratory rate over time from admission, with upper and lower alarm thresholds')
plt.ylim(0,55)
In [ ]:
# Display the first few rows of the GCS eye response data
ce[ce.LABEL=='GCS - Eye Opening'].head()
In [ ]:
# Prepare the size of the figure
plt.figure(figsize=(18, 10))
# Set x equal to the times
x_hr = ce.HOURS[ce.LABEL=='Heart Rate']
# Set y equal to the heart rates
y_hr = ce.VALUENUM[ce.LABEL=='Heart Rate']
plt.plot(x_hr,y_hr)
plt.plot(ce.HOURS[ce.LABEL=='Respiratory Rate'],
ce.VALUENUM[ce.LABEL=='Respiratory Rate'],
'k', markersize=6)
# Add a text label to the y-axis
plt.text(-20,155,'GCS - Eye Opening',fontsize=14)
plt.text(-20,150,'GCS - Motor Response',fontsize=14)
plt.text(-20,145,'GCS - Verbal Response',fontsize=14)
# Iterate over list of GCS labels, plotting around 1 in 10 to avoid overlap
for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Eye Opening'].values):
if np.mod(i,6)==0 and i < 65:
plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Eye Opening'].values[i],155),fontsize=14)
for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Motor Response'].values):
if np.mod(i,6)==0 and i < 65:
plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Motor Response'].values[i],150),fontsize=14)
for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Verbal Response'].values):
if np.mod(i,6)==0 and i < 65:
plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Verbal Response'].values[i],145),fontsize=14)
plt.title('Vital signs and Glasgow Coma Scale over time from admission',fontsize=16)
plt.xlabel('Time (hours)',fontsize=16)
plt.ylabel('Heart rate or GCS',fontsize=16)
plt.ylim(10,165)
In [ ]:
# OPTION 1: load outputs from the patient
query = """
select de.icustay_id
, (strftime('%s',de.charttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS
, di.label
, de.value
, de.valueuom
from outputevents de
inner join icustays ie
on de.icustay_id = ie.icustay_id
inner join d_items di
on de.itemid = di.itemid
where de.subject_id = 40080
order by charttime;
"""
oe = pd.read_sql_query(query,conn)
In [ ]:
oe.head()
In [ ]:
plt.figure(figsize=(14, 10))
plt.figure(figsize=(14, 6))
plt.title('Fluid output over time')
plt.plot(oe.HOURS,
oe.VALUE.cumsum()/1000,
'ro', markersize=8, label='Output volume, L')
plt.xlim(0,72)
plt.ylim(0,10)
plt.legend()
To provide necessary context to this plot, it would help to include patient input data. This provides the necessary context to determine a patient's fluid balance - a key indicator in patient health.
In [ ]:
# OPTION 1: load inputs given to the patient (usually intravenously) using the database connection
query = """
select de.icustay_id
, (strftime('%s',de.starttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS_START
, (strftime('%s',de.endtime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS_END
, de.linkorderid
, di.label
, de.amount
, de.amountuom
, de.rate
, de.rateuom
from inputevents_mv de
inner join icustays ie
on de.icustay_id = ie.icustay_id
inner join d_items di
on de.itemid = di.itemid
where de.subject_id = 40080
order by endtime;
"""
ie = pd.read_sql_query(query,conn)
# # OPTION 2: load ioevents using the CSV file with endtime as the index
# ioe = pd.read_csv('inputevents.csv'
# ,header=None
# ,names=['subject_id','itemid','label','starttime','endtime','amount','amountuom','rate','rateuom']
# ,parse_dates=True)
In [ ]:
ie.head()
Note that the column headers are different: we have "HOURS_START" and "HOURS_END". This is because inputs are administered over a fixed period of time.
In [ ]:
ie['LABEL'].unique()
In [ ]:
plt.figure(figsize=(14, 10))
# Plot the cumulative input against the cumulative output
plt.plot(ie.HOURS_END[ie.AMOUNTUOM=='mL'],
ie.AMOUNT[ie.AMOUNTUOM=='mL'].cumsum()/1000,
'go', markersize=8, label='Intake volume, L')
plt.plot(oe.HOURS,
oe.VALUE.cumsum()/1000,
'ro', markersize=8, label='Output volume, L')
plt.title('Fluid balance over time',fontsize=16)
plt.xlabel('Hours',fontsize=16)
plt.ylabel('Volume (litres)',fontsize=16)
# plt.ylim(0,38)
plt.legend()
As the plot shows, the patient's intake tends to be above their output (as one would expect!) - but there are periods where they are almost one to one. One of the biggest challenges of working with ICU data is that context is everything - let's look at a treatment (lasix) that we know will affect this graph.
In [ ]:
plt.figure(figsize=(14, 10))
# Plot the cumulative input against the cumulative output
plt.plot(ie.HOURS_END[ie.AMOUNTUOM=='mL'],
ie.AMOUNT[ie.AMOUNTUOM=='mL'].cumsum()/1000,
'go', markersize=8, label='Intake volume, L')
plt.plot(oe.HOURS,
oe.VALUE.cumsum()/1000,
'ro', markersize=8, label='Output volume, L')
# example on getting two columns from a dataframe: ie[['HOURS_START','HOURS_END']].head()
for i, idx in enumerate(ie.index[ie.LABEL=='Furosemide (Lasix)']):
plt.plot([ie.HOURS_START[ie.LABEL=='Furosemide (Lasix)'][idx],
ie.HOURS_END[ie.LABEL=='Furosemide (Lasix)'][idx]],
[ie.RATE[ie.LABEL=='Furosemide (Lasix)'][idx],
ie.RATE[ie.LABEL=='Furosemide (Lasix)'][idx]],
'b-',linewidth=4)
plt.title('Fluid balance over time',fontsize=16)
plt.xlabel('Hours',fontsize=16)
plt.ylabel('Volume (litres)',fontsize=16)
# plt.ylim(0,38)
plt.legend()
In [ ]:
ie['LABEL'].unique()
In [ ]:
# Exercise 2 here
In [ ]:
plt.figure(figsize=(14, 10))
plt.plot(ce.index[ce.LABEL=='Heart Rate'],
ce.VALUENUM[ce.LABEL=='Heart Rate'],
'rx', markersize=8, label='HR')
plt.plot(ce.index[ce.LABEL=='O2 saturation pulseoxymetry'],
ce.VALUENUM[ce.LABEL=='O2 saturation pulseoxymetry'],
'g.', markersize=8, label='O2')
plt.plot(ce.index[ce.LABEL=='Arterial Blood Pressure mean'],
ce.VALUENUM[ce.LABEL=='Arterial Blood Pressure mean'],
'bv', markersize=8, label='MAP')
plt.plot(ce.index[ce.LABEL=='Respiratory Rate'],
ce.VALUENUM[ce.LABEL=='Respiratory Rate'],
'k+', markersize=8, label='RR')
plt.title('Vital signs over time from admission')
plt.ylim(0,130)
plt.legend()
Using Pandas 'read_csv function' again, we'll now load the labevents data. This data corresponds to measurements made in a laboratory - usually on a sample of patient blood.
In [ ]:
# OPTION 1: load labevents data using the database connection
query = """
SELECT de.subject_id
, de.charttime
, di.label, de.value, de.valuenum
, de.uom
FROM labevents de
INNER JOIN d_labitems di
ON de.itemid = di.itemid
where de.subject_id = 40080
"""
le = pd.read_sql_query(query,conn)
# OPTION 2: load labevents from the CSV file
# le = pd.read_csv('data/example_labevents.csv', index_col='HOURSSINCEADMISSION')
In [ ]:
# preview the labevents data
le.head()
In [ ]:
# preview the ioevents data
le[le.LABEL=='HEMOGLOBIN']
In [ ]:
plt.figure(figsize=(14, 10))
plt.plot(le.index[le.LABEL=='HEMATOCRIT'],
le.VALUENUM[le.LABEL=='HEMATOCRIT'],
'go', markersize=6, label='Haematocrit')
plt.plot(le.index[le.LABEL=='HEMOGLOBIN'],
le.VALUENUM[le.LABEL=='HEMOGLOBIN'],
'bv', markersize=8, label='Hemoglobin')
plt.title('Laboratory measurements over time from admission')
plt.ylim(0,38)
plt.legend()