Creating summary statistics with tableone

This document demonstrates how the tableone package can be used to create a table of summary statistics for a patient cohort selected from MIMIC-III.

Import the libraries



In [1]:

    
# Import libraries
from tableone import TableOne
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import getpass
%matplotlib inline
plt.style.use('ggplot')

Create the database connection



In [2]:

    
# Create a database connection
user = 'postgres'
host = 'localhost'
dbname = 'mimic'
schema = 'mimiciii_demo'



In [3]:

    
# Connect to the database
con = psycopg2.connect(dbname=dbname, user=user, host=host, 
                       password=getpass.getpass(prompt='Password:'.format(user)))
cur = con.cursor()
cur.execute('SET search_path to {}'.format(schema))









    



Password:········

Select data on the first hospital stay



In [4]:

    
# Run query and assign the results to a Pandas DataFrame
# Get first admission for each patient
query = \
"""
WITH admit AS (
    SELECT p.gender, 
        ROUND( (CAST(EXTRACT(epoch FROM a.dischtime - a.admittime)/(60*60*24) AS numeric)), 4) AS los_hospital,
        ROUND( (CAST(EXTRACT(epoch FROM a.admittime - p.dob)/(60*60*24*365.242) AS numeric)), 4) AS age, 
        DENSE_RANK() OVER (PARTITION BY a.subject_id ORDER BY a.admittime) AS admission_seq,
        a.ethnicity, a.admission_type, a.insurance, a.religion, a.marital_status, a.hospital_expire_flag
    FROM patients p
    LEFT JOIN admissions a
    ON p.subject_id = a.subject_id)
SELECT *
FROM admit
WHERE admission_seq = 1;
"""

data = pd.read_sql_query(query,con)

Display the first few rows of the data



In [ ]:

    
data.head()

Create Table 1



In [6]:

    
# Columns to include in the summary table
columns = ['gender', 'los_hospital', 'age', 'ethnicity','admission_type', 'insurance', 
           'religion', 'marital_status','hospital_expire_flag']

# List of categorical variables
categorical = ['gender', 'ethnicity', 'insurance', 'religion', 'marital_status',
           'hospital_expire_flag']

# Group the data
groupby = 'admission_type'

# Display the top n number of categorical variables
limit = 3

# Compute p values
pval = False

# Display a count of null values
isnull = False

t = TableOne(data, columns=columns, categorical=categorical, 
         groupby=groupby, limit=limit, pval=pval, isnull=isnull)

t.tableone









    Out[6]:







  
    
      
      
      Grouped by admission_type
    
    
      
      
      ELECTIVE
      EMERGENCY
      URGENT
    
    
      variable
      level
      
      
      
    
  
  
    
      n
      
      8
      90
      2
    
    
      age
      
      74.23 (11.05)
      90.01 (68.12)
      75.66 (4.29)
    
    
      ethnicity
      WHITE
      8 (100.0)
      65 (72.22)
      1 (50.0)
    
    
      UNKNOWN/NOT SPECIFIED
      
      9 (10.0)
      1 (50.0)
    
    
      BLACK/AFRICAN AMERICAN
      
      6 (6.67)
      
    
    
      gender
      F
      5 (62.5)
      48 (53.33)
      2 (100.0)
    
    
      M
      3 (37.5)
      42 (46.67)
      
    
    
      hospital_expire_flag
      0
      8 (100.0)
      58 (64.44)
      1 (50.0)
    
    
      1
      
      32 (35.56)
      1 (50.0)
    
    
      insurance
      Medicare
      5 (62.5)
      70 (77.78)
      1 (50.0)
    
    
      Private
      3 (37.5)
      15 (16.67)
      1 (50.0)
    
    
      Medicaid
      
      4 (4.44)
      
    
    
      los_hospital
      
      11.67 (11.79)
      9.86 (14.43)
      6.26 (0.81)
    
    
      marital_status
      MARRIED
      4 (50.0)
      36 (48.0)
      1 (50.0)
    
    
      SINGLE
      2 (25.0)
      18 (24.0)
      
    
    
      WIDOWED
      1 (12.5)
      13 (17.33)
      
    
    
      religion
      CATHOLIC
      6 (75.0)
      33 (37.08)
      
    
    
      UNOBTAINABLE
      
      16 (17.98)
      
    
    
      NOT SPECIFIED
      1 (12.5)
      14 (15.73)



In [ ]:

		Grouped by admission_type
		ELECTIVE	EMERGENCY	URGENT
variable	level
n		8	90	2
age		74.23 (11.05)	90.01 (68.12)	75.66 (4.29)
ethnicity	WHITE	8 (100.0)	65 (72.22)	1 (50.0)
	UNKNOWN/NOT SPECIFIED		9 (10.0)	1 (50.0)
	BLACK/AFRICAN AMERICAN		6 (6.67)
gender	F	5 (62.5)	48 (53.33)	2 (100.0)
gender	M	3 (37.5)	42 (46.67)
hospital_expire_flag	0	8 (100.0)	58 (64.44)	1 (50.0)
hospital_expire_flag	1		32 (35.56)	1 (50.0)
insurance	Medicare	5 (62.5)	70 (77.78)	1 (50.0)
	Private	3 (37.5)	15 (16.67)	1 (50.0)
	Medicaid		4 (4.44)
los_hospital		11.67 (11.79)	9.86 (14.43)	6.26 (0.81)
marital_status	MARRIED	4 (50.0)	36 (48.0)	1 (50.0)
	SINGLE	2 (25.0)	18 (24.0)
	WIDOWED	1 (12.5)	13 (17.33)
religion	CATHOLIC	6 (75.0)	33 (37.08)
	UNOBTAINABLE		16 (17.98)
	NOT SPECIFIED	1 (12.5)	14 (15.73)