This document demonstrates how the tableone package can be used to create a table of summary statistics for a patient cohort selected from MIMIC-III.
In [1]:
# Import libraries
from tableone import TableOne
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import getpass
%matplotlib inline
plt.style.use('ggplot')
In [2]:
# Create a database connection
user = 'postgres'
host = 'localhost'
dbname = 'mimic'
schema = 'mimiciii_demo'
In [3]:
# Connect to the database
con = psycopg2.connect(dbname=dbname, user=user, host=host,
password=getpass.getpass(prompt='Password:'.format(user)))
cur = con.cursor()
cur.execute('SET search_path to {}'.format(schema))
In [4]:
# Run query and assign the results to a Pandas DataFrame
# Get first admission for each patient
query = \
"""
WITH admit AS (
SELECT p.gender,
ROUND( (CAST(EXTRACT(epoch FROM a.dischtime - a.admittime)/(60*60*24) AS numeric)), 4) AS los_hospital,
ROUND( (CAST(EXTRACT(epoch FROM a.admittime - p.dob)/(60*60*24*365.242) AS numeric)), 4) AS age,
DENSE_RANK() OVER (PARTITION BY a.subject_id ORDER BY a.admittime) AS admission_seq,
a.ethnicity, a.admission_type, a.insurance, a.religion, a.marital_status, a.hospital_expire_flag
FROM patients p
LEFT JOIN admissions a
ON p.subject_id = a.subject_id)
SELECT *
FROM admit
WHERE admission_seq = 1;
"""
data = pd.read_sql_query(query,con)
In [ ]:
data.head()
In [6]:
# Columns to include in the summary table
columns = ['gender', 'los_hospital', 'age', 'ethnicity','admission_type', 'insurance',
'religion', 'marital_status','hospital_expire_flag']
# List of categorical variables
categorical = ['gender', 'ethnicity', 'insurance', 'religion', 'marital_status',
'hospital_expire_flag']
# Group the data
groupby = 'admission_type'
# Display the top n number of categorical variables
limit = 3
# Compute p values
pval = False
# Display a count of null values
isnull = False
t = TableOne(data, columns=columns, categorical=categorical,
groupby=groupby, limit=limit, pval=pval, isnull=isnull)
t.tableone
Out[6]:
In [ ]: