Creating summary statistics with tableone

This document demonstrates how the tableone package can be used to create a table of summary statistics for a patient cohort selected from MIMIC-III.

Import the libraries


In [1]:
# Import libraries
from tableone import TableOne
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import getpass
%matplotlib inline
plt.style.use('ggplot')

Create the database connection


In [2]:
# Create a database connection
user = 'postgres'
host = 'localhost'
dbname = 'mimic'
schema = 'mimiciii_demo'

In [3]:
# Connect to the database
con = psycopg2.connect(dbname=dbname, user=user, host=host, 
                       password=getpass.getpass(prompt='Password:'.format(user)))
cur = con.cursor()
cur.execute('SET search_path to {}'.format(schema))


Password:········

Select data on the first hospital stay


In [4]:
# Run query and assign the results to a Pandas DataFrame
# Get first admission for each patient
query = \
"""
WITH admit AS (
    SELECT p.gender, 
        ROUND( (CAST(EXTRACT(epoch FROM a.dischtime - a.admittime)/(60*60*24) AS numeric)), 4) AS los_hospital,
        ROUND( (CAST(EXTRACT(epoch FROM a.admittime - p.dob)/(60*60*24*365.242) AS numeric)), 4) AS age, 
        DENSE_RANK() OVER (PARTITION BY a.subject_id ORDER BY a.admittime) AS admission_seq,
        a.ethnicity, a.admission_type, a.insurance, a.religion, a.marital_status, a.hospital_expire_flag
    FROM patients p
    LEFT JOIN admissions a
    ON p.subject_id = a.subject_id)
SELECT *
FROM admit
WHERE admission_seq = 1;
"""

data = pd.read_sql_query(query,con)

Display the first few rows of the data


In [ ]:
data.head()

Create Table 1


In [6]:
# Columns to include in the summary table
columns = ['gender', 'los_hospital', 'age', 'ethnicity','admission_type', 'insurance', 
           'religion', 'marital_status','hospital_expire_flag']

# List of categorical variables
categorical = ['gender', 'ethnicity', 'insurance', 'religion', 'marital_status',
           'hospital_expire_flag']

# Group the data
groupby = 'admission_type'

# Display the top n number of categorical variables
limit = 3

# Compute p values
pval = False

# Display a count of null values
isnull = False

t = TableOne(data, columns=columns, categorical=categorical, 
         groupby=groupby, limit=limit, pval=pval, isnull=isnull)

t.tableone


Out[6]:
Grouped by admission_type
ELECTIVE EMERGENCY URGENT
variable level
n 8 90 2
age 74.23 (11.05) 90.01 (68.12) 75.66 (4.29)
ethnicity WHITE 8 (100.0) 65 (72.22) 1 (50.0)
UNKNOWN/NOT SPECIFIED 9 (10.0) 1 (50.0)
BLACK/AFRICAN AMERICAN 6 (6.67)
gender F 5 (62.5) 48 (53.33) 2 (100.0)
M 3 (37.5) 42 (46.67)
hospital_expire_flag 0 8 (100.0) 58 (64.44) 1 (50.0)
1 32 (35.56) 1 (50.0)
insurance Medicare 5 (62.5) 70 (77.78) 1 (50.0)
Private 3 (37.5) 15 (16.67) 1 (50.0)
Medicaid 4 (4.44)
los_hospital 11.67 (11.79) 9.86 (14.43) 6.26 (0.81)
marital_status MARRIED 4 (50.0) 36 (48.0) 1 (50.0)
SINGLE 2 (25.0) 18 (24.0)
WIDOWED 1 (12.5) 13 (17.33)
religion CATHOLIC 6 (75.0) 33 (37.08)
UNOBTAINABLE 16 (17.98)
NOT SPECIFIED 1 (12.5) 14 (15.73)

In [ ]: