Correlation of DCO2 and weight-corrected DCO2 with PCO2

This file contains the code used for data processing, statistical analysis and visualization described in the following paper: "Weight-correction of carbon dioxide diffusion coefficient (DCO2) reduces its inter-individual variability and improves its correlation with blood carbon dioxide levels in neonates receiving high-frequency oscillatory ventilation." Pediatric Pulmonology, 2017;1–7. https://doi.org/10.1002/ppul.23759

Authors: Gusztav Belteki MD, PhD, FRCPCH, Benjamin Lin BA, Colin Morley MD, FRCPCH

Contact: gbelteki@aol.com

The outputs (numbers, tables, graphs) of this Notebook have been suppresses to comply with copyrights. The corresponding data and graphs can be found in the paper.

Import the required modules and libraries and set options


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import re
import operator
import warnings

from pandas import Series, DataFrame
from scipy.stats.stats import pearsonr
from scipy.stats import ttest_rel
from datetime import datetime, timedelta
from pprint import pprint

%matplotlib inline

pd.set_option('display.max_rows', 20)
pd.set_option('display.max_columns', 50)
warnings.simplefilter('ignore')

List and set the working directory and the directory to write out data


In [2]:
# Directory to read the data from
%cd /Users/guszti/ventilation_data
cwd = os.getcwd()


/Users/guszti/ventilation_data

In [3]:
# Directory to write the data to
dir_write = '/Users/guszti/ventilation_data/Analyses/DCO2/revision'

List of recordings


In [4]:
# Part or all of these recordings is HFOV
recordings = [ 'DG005_1', 'DG005_2', 'DG005_3', 'DG006_1', 'DG009',  'DG016', 'DG017', 'DG018_1',  
              'DG020', 'DG022', 'DG025', 'DG027', 'DG032_2', 'DG038_1', 'DG038_2', 'DG040_1', 'DG040_2', 'DG046_1']

Initialize directories, analysis settings, and visualization markers


In [5]:
clinical_details = {}
current_weights = {}
blood_gases = {}
pCO2s = {}
sedation = {}
vent_modes = {}
vent_modes_selected = {} # only important mode parameters are kept in this one
vent_settings = {}
vent_settings_selected = {} # only important mode parameters are kept in this one
slow_measurements = {} # 'slow measurements' are the ventilator parameters obtained with 1/sec frequeency
slow_measurements_hfov = {} # this only contains those parts of 'slow measurements' which do have a DCO2 reading
                            # and therefore are hfov ventilation periods

In [6]:
# Analysing DCO2s over "interval" minutes stopping "offset" minutes prior to the time of the blood gas

interval = 10 # 10 minutes
offset = 2 # 2 minutes

In [7]:
# color and marker set for visualization

colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'magenta', 'maroon', 'navy', 'salmon', 'silver',  'tomato',
          'violet', 'orchid', 'peru', 'wheat', 'lime', 'limegreen']
markers = [".", "+", ",", "x", "o", "D", "d", "8", "s", "p", "*", "h", 0, 4, "<", "3",
           1, 5, ">", "4", 2, 6, "^", "2", 3, 7, "v", "1"]

Import clinical details


In [8]:
for recording in recordings:
    clinical_details[recording] = pd.read_excel('%s/%s' % (cwd, 'data_grabber_patient_data.xlsx'), 
                                                sheetname = recording[:5])

In [9]:
# If there are multiple recordings for the same patient this imports the weight relevant for the given recording
for recording in recordings:
    w = 1 if len(recording) == 5 else int(recording[-1])
    current_weights[recording] = clinical_details[recording].ix[4, w]/1000

Define function to import data


In [10]:
def data_loader(lst):
    
    '''
    lst ->  DataFrame 
    
    - Takes a list of csv files (given as filenames with absolute paths) and import them as a dataframes 
    - Combines the 'Date' and 'Time' columns into a Datetime index while keeping the original columns
    - If there are more files it concatenates them into a single dataframe. 
    - It returns the concatenated data as one DataFrame. 
    
    '''
    
    data  = []
    for i, filename in enumerate(lst):
        data.append(pd.read_csv(lst[i], keep_date_col = 'True', parse_dates = [['Date', 'Time']])) 
    data = pd.concat(data) 
    data.index = data.Date_Time
    return data

Import ventilator modes and settings


In [11]:
def slow_setting_finder(lst):
    '''
    list -> list
    
    Takes a list of filenames and returns those ones that end with '_slow_Setting.csv'
    '''
    return [n for n in lst if n.endswith('_slow_Setting.csv')]

In [12]:
def slow_text_finder(lst):
    '''
    list -> list
    
    Takes a list of filenames and returns those ones that end with '_slow_Text.csv'
    '''
    return [n for n in lst if n.endswith('_slow_Text.csv')]

In [13]:
for recording in recordings:
    flist = os.listdir('%s/%s' % (cwd, recording))
    files = slow_setting_finder(flist)
    # print('Loading recording %s' % recording)
    # print(files)
    fnames = ['%s/%s/%s' % (cwd, recording, filename) for filename in files]
    vent_settings[recording] =  data_loader(fnames)

In [14]:
# remove less important ventilator settings to simplify the table
for recording in recordings:
    a = vent_settings[recording][vent_settings[recording].Id != 'FiO2']
    a = a[a.Id != 'Ø tube']
    a = a[a.Id != 'Tapn']
    a = a[a.Id != 'ΔPsupp']
    a = a[a.Id != 'Tube Æ']
    a = a[a.Id != 'RRsigh']
    a = a[a.Id != 'Psigh']
    a = a[a.Id != 'Slopesigh']
    a = a[a.Unit != 'L']
    a = a[a.Id != 'I (I:E)']
    a = a[a.Id != 'E (I:E)']
    a = a[a.Id != 'MVlow delay']
    a = a[a.Id != 'MVhigh delay']
    a.drop_duplicates(["Rel.Time [s]", "Name"], inplace = True)
    vent_settings_selected[recording] = a

In [15]:
for recording in recordings:
    flist = os.listdir('%s/%s' % (cwd, recording))
    files = slow_text_finder(flist)
    # print('Loading recording %s' % recording)
    # print(files)
    fnames = ['%s/%s/%s' % (cwd, recording, filename) for filename in files]
    vent_modes[recording] =  data_loader(fnames)

In [16]:
# remove less important ventilator mode settings to simplify the table
for recording in recordings:
    a = vent_modes[recording][vent_modes[recording].Id != 'Device is in neonatal mode']
    a = a[a.Id != 'Device is in neonatal mode']
    a = a[a.Id != 'Selected CO2 unit is mmHg']
    a = a[a.Id != "Selected language (variable text string transmitted with this code, see 'MEDIBUS.X, Rules and Standards for Implementation' for further information)"]
    a = a[a.Id != 'Device configured for intubated patient ventilation']
    a = a[a.Id != 'Active humid heated']
    a = a[a.Id != 'Audio pause active']
    a = a[a.Id != 'Active humid unheated']
    a = a[a.Id != 'Suction maneuver active']
    a.drop_duplicates(["Rel.Time [s]", "Id"], inplace = True)
    vent_modes_selected[recording] = a

Import DCO2 data


In [ ]:
def slow_measurement_finder(lst):
    '''
    list -> list
    
    Takes a list of filenames and returns those ones that end with 'Measurement.csv''
    '''
    return [n for n in lst if n.endswith('_slow_Measurement.csv')]

In [ ]:
for recording in recordings:
    flist = os.listdir('%s/%s' % (cwd, recording))
    files = slow_measurement_finder(flist)
    # print('Loading recording %s' % recording)
    # print(files)
    fnames = ['%s/%s/%s' % (cwd, recording, filename) for filename in files]
    slow_measurements[recording] =  data_loader(fnames)

Clean up DCO2 data


In [ ]:
# resampling to adjust the time stamps to full seconds
for recording in recordings:
    slow_measurements[recording] = slow_measurements[recording].resample('1S').mean()

In [ ]:
# Remove rows which have no DCO2 data - this keeps HFOV periods only
for recording in recordings:
    try:
        slow_measurements_hfov[recording] = slow_measurements[recording].dropna(subset = ['5001|DCO2 [10*mL^2/s]'])
    except KeyError:
        print('%s cannot be converted' % recording )

In [ ]:
for recording in recordings:
    
    # Create a column in the DataFrame with the DCO2 data. The original values need to be multiplied by 10 
    # as in the the downloaded data they are expressed as 1/10th of the DCO2 readings (see original column labels)
    slow_measurements_hfov[recording]['DCO2'] = \
        DataFrame(slow_measurements_hfov[recording]['5001|DCO2 [10*mL^2/s]'] * 10)
    
    # this normalizes the DCO2 to the body weight. DCO2 = f x VT x VT. If VT is expressed as mL/kg then DCO2_corr
    # should be 1/sec x mL/kg x mL/kg = mL^2 / kg^2 / sec     
    slow_measurements_hfov[recording]['DCO2_corr'] = \
        DataFrame(slow_measurements_hfov[recording]['5001|DCO2 [10*mL^2/s]'] * 10 / (current_weights[recording] ** 2))
        
    # This column normalizes the VThf to body weight
    slow_measurements_hfov[recording]['VThf_kg'] = \
        DataFrame(slow_measurements_hfov[recording]['5001|VThf [mL]']  / current_weights[recording]) 
        
    # This column normalizs the VThf to body weight squares
    slow_measurements_hfov[recording]['VThf_kg_2'] = slow_measurements_hfov[recording]['VThf_kg']**2

Write out and import processed HFOV data to binary "pickle" file


In [ ]:
for recording in recordings:
    print('%s is being pickled' % recording)
    slow_measurements_hfov[recording].to_pickle('%s/%s_slow_measurements_hfov.p' % (dir_write, recording))

In [ ]:
# importing from pickles
for recording in recordings:
    slow_measurements_hfov[recording] = pd.read_pickle('%s/%s_slow_measurements_hfov.p' % (dir_write, recording))

Calculate HFOV periods


In [ ]:
HFOV_periods = {}

for recording in recordings:
    start = slow_measurements_hfov[recording].index[0]
    end = slow_measurements_hfov[recording].index[-1]
    HFOV_periods[recording] = [start, end]

In [ ]:
# How long are the HFOV periods (in seconds) ?
hfov_durations = {}
for recording in recordings:
    hfov_durations[recording] = HFOV_periods[recording][1] - HFOV_periods[recording][0]

Calculate descriptive statistics on DCO2 data over the whole duration of the recordings and write them to file


In [ ]:
def stats_calculator(obj, col):
    '''
    in: obj (Series or DataFrame), col (str)
    out: tuple of size 8
    
    Calculates 8 different descriptive statistics for the data in the 'col' column of a series or dataframe
    '''
    
    a = obj[col]
    return (a.size, a.mean(), a.std(), a.median(), a.mad(), 
            a.min(), a.quantile(0.25), a.quantile(0.75), a.max())

In [ ]:
# Descriptive statistics on uncorrected DCO2 data
DCO2_stats_all = []

for recording in recordings:
    stats = (stats_calculator(slow_measurements_hfov[recording], 'DCO2'))
    frame = DataFrame([stats], columns = ['number_of_seconds', 'mean', 'stdev', 'median', 'mad', 
                                             'min', '25pc', '75pc', 'max', ], index = [recording])
    DCO2_stats_all.append(frame)
    
DCO2_stats_all = pd.concat(DCO2_stats_all)
DCO2_stats_all['hours'] = DCO2_stats_all['number_of_seconds'] / 3600

In [ ]:
# Descriptive statistics on weight-squared-corrected DCO2 data
DCO2_corr_stats_all = []

for recording in recordings:
    stats = (stats_calculator(slow_measurements_hfov[recording], 'DCO2_corr'))
    frame = DataFrame([stats], columns = ['number_of_seconds', 'mean', 'stdev', 'median', 'mad', 
                                             'min', '25pc', '75pc', 'max', ], index = [recording])
    DCO2_corr_stats_all.append(frame)
    
DCO2_corr_stats_all = pd.concat(DCO2_corr_stats_all)
writer = pd.ExcelWriter('%s/%s' % (dir_write, 'DCO2_stats.xlsx')) DCO2_stats_all.to_excel(writer,'DCO2') DCO2_corr_stats_all.to_excel(writer,'DCO2_corr') writer.save()

Import blood gases


In [ ]:
for recording in recordings:
    blood_gases[recording] = pd.read_excel('%s/%s' % (cwd, 'data_grabber_gases.xlsx'), 
                                            sheetname = recording[:5], header = None)
    blood_gases[recording] = DataFrame(blood_gases[recording].T)
    blood_gases[recording].columns = blood_gases[recording].ix[0]
    blood_gases[recording] = blood_gases[recording][1:]
    blood_gases[recording].index = [blood_gases[recording]['Date:'], blood_gases[recording]['Time:']]

In [ ]:
pCO2s = {}
for recording in recordings:
    pCO2s[recording] = blood_gases[recording][['pCO2, POC', 'Blood specimen type, POC']]

Change the index of PCO2s into single index format


In [ ]:
# Change the index of pCO2s into single index format

for recording in recordings:
    # print('processing CO2 data for %s' % recording)
    time_list_all = []
    for i in range(len(pCO2s[recording])):
        day = str(pCO2s[recording].index[i][0])[:10]
        time = str(pCO2s[recording].index[i][1])
        date_time = day + ' ' + time
        time_list_all.append(date_time)
                   
    pCO2s[recording].index = time_list_all

In [ ]:
# Convert the indices of the pCO2s DataFrames to datetime index

for recording in recordings:
    try:
        pCO2s[recording].index = pd.to_datetime(pCO2s[recording].index)
    except:
        print('%s cannot be converted' % recording)

Create a dictionary containing the time of blood gases taken during HFOV recording


In [ ]:
gas_list = {}

In [ ]:
gas_list['DG005_1'] = ['2015-10-13 15:30:00', '2015-10-13 17:38:00', '2015-10-13 21:33:00', '2015-10-13 23:05:00',
 '2015-10-14 01:29:00', '2015-10-14 03:56:00', '2015-10-14 07:02:00', '2015-10-14 11:12:00', '2015-10-14 13:10:00',
 '2015-10-14 14:07:00', '2015-10-14 14:31:00', '2015-10-14 16:32:00', '2015-10-14 19:08:00', '2015-10-14 22:42:00',
 '2015-10-15 00:01:00', '2015-10-15 02:44:00', '2015-10-15 03:57:00', '2015-10-15 05:21:00', '2015-10-15 07:45:00',
 '2015-10-15 11:56:00' ]

In [ ]:
gas_list['DG005_2'] = ['2015-10-24 12:33:00', '2015-10-24 13:47:00', ]

In [ ]:
gas_list['DG005_3'] = ['2015-11-04 04:09:00', '2015-11-04 07:24:00', ]

In [ ]:
gas_list['DG006_1'] = [ '2015-10-15 17:45:00', '2015-10-15 20:12:00', '2015-10-15 21:12:00', '2015-10-15 22:30:00',
                       '2015-10-16 01:13:00', '2015-10-16 03:28:00', '2015-10-16 05:07:00', '2015-10-16 07:17:00',
                       '2015-10-16 10:59:00', '2015-10-16 15:34:00']

In [ ]:
gas_list['DG009'] = [ '2015-10-26 17:54:00', '2015-10-26 20:08:00', '2015-10-26 21:33:00', '2015-10-27 01:08:00', 
                     '2015-10-27 06:34:00', '2015-10-27 11:56:00',  '2015-10-27 13:47:00', '2015-10-27 21:06:00',
                     '2015-10-28 02:49:00', '2015-10-28 07:04:00', '2015-10-28 09:44:00']

In [ ]:
gas_list['DG016'] = [ '2015-12-07 14:35:00', '2015-12-07 16:51:00']

In [ ]:
gas_list['DG017'] = ['2015-12-08 12:58:00', '2015-12-08 16:44:00', '2015-12-08 19:33:00',
       '2015-12-08 22:51:00', '2015-12-08 23:27:00', '2015-12-08 23:34:00',
       '2015-12-09 02:06:00', '2015-12-09 03:48:00', '2015-12-09 05:41:00',
       '2015-12-09 07:49:00', '2015-12-09 07:56:00', '2015-12-09 09:12:00',
       '2015-12-09 09:19:00', '2015-12-09 12:27:00', '2015-12-09 16:37:00',
       '2015-12-09 17:39:00', '2015-12-09 20:50:00', '2015-12-10 01:04:00',
       '2015-12-10 03:35:00', '2015-12-10 03:37:00', '2015-12-10 04:33:00',
       '2015-12-10 10:41:00', '2015-12-10 10:48:00', '2015-12-10 12:29:00',
       '2015-12-10 14:20:00', '2015-12-10 17:02:00', '2015-12-10 22:02:00',
       '2015-12-11 03:04:00', '2015-12-11 03:12:00', '2015-12-11 06:16:00',
       '2015-12-11 06:25:00', '2015-12-11 06:59:00', '2015-12-11 10:21:00',
       '2015-12-11 14:16:00']

In [ ]:
gas_list['DG018_1'] = ['2015-12-13 01:27:00',
       '2015-12-13 03:43:00', '2015-12-13 05:20:00', '2015-12-13 07:05:00',
       '2015-12-13 08:46:00', '2015-12-13 10:55:00', '2015-12-13 11:40:00',
       '2015-12-13 12:50:00', '2015-12-13 14:50:00', '2015-12-13 16:50:00',
       '2015-12-13 19:40:00', '2015-12-13 22:22:00', '2015-12-14 00:19:00',
       '2015-12-14 01:33:00', '2015-12-14 04:37:00', '2015-12-14 06:55:00',
       '2015-12-14 09:59:00', '2015-12-14 11:23:00', '2015-12-14 13:07:00']

In [ ]:
gas_list['DG020'] = [ '2015-12-29 11:52:00', '2015-12-29 15:36:00', '2015-12-29 19:13:00', '2015-12-29 22:23:00',
       '2015-12-30 02:10:00', '2015-12-30 03:54:00', '2015-12-30 05:43:00', '2015-12-30 07:46:00',
        '2015-12-30 09:06:00', '2015-12-30 13:07:00']

In [ ]:
gas_list['DG022'] = ['2016-01-02 12:43:00', '2016-01-02 15:58:00', '2016-01-02 19:46:00',
       '2016-01-02 21:41:00', '2016-01-02 22:43:00', '2016-01-03 00:01:00',
       '2016-01-03 01:15:00', '2016-01-03 02:14:00', '2016-01-03 03:05:00',
       '2016-01-03 03:59:00', '2016-01-03 05:34:00', '2016-01-03 07:37:00',
       '2016-01-03 13:28:00', '2016-01-03 18:54:00', '2016-01-03 22:44:00',
       '2016-01-05 02:50:00', '2016-01-05 07:09:00', '2016-01-05 14:08:00',
       '2016-01-05 23:31:00']

In [ ]:
gas_list['DG025'] = [ '2016-01-21 23:46:00',
       '2016-01-22 03:31:00', '2016-01-22 06:27:00', '2016-01-22 09:06:00',
       '2016-01-22 11:09:00', '2016-01-22 12:35:00', '2016-01-22 14:14:00',
       '2016-01-22 18:03:00', '2016-01-22 22:12:00', '2016-01-23 01:24:00',
       '2016-01-23 04:24:00', '2016-01-23 08:46:00', '2016-01-23 10:17:00',
       '2016-01-23 14:47:00', '2016-01-23 19:22:00', '2016-01-23 21:41:00',
       '2016-01-24 00:45:00', '2016-01-24 07:50:00', '2016-01-24 10:59:00',]

In [ ]:
gas_list['DG027'] = ['2016-01-29 10:48:00', '2016-01-29 11:56:00', '2016-01-29 14:34:00']

In [ ]:
gas_list['DG032_2'] = ['2016-03-22 13:22:00',
       '2016-03-22 21:07:00', '2016-03-23 02:17:00', '2016-03-23 11:19:00',
       '2016-03-23 18:46:00', '2016-03-23 21:33:00', '2016-03-24 00:15:00',
       '2016-03-24 02:13:00', '2016-03-24 06:15:00', '2016-03-24 09:08:00',
       '2016-03-24 13:20:00', '2016-03-24 18:19:00', '2016-03-25 01:41:00',
       '2016-03-25 03:29:00', '2016-03-25 10:46:00', '2016-03-25 17:44:00',
       '2016-03-26 02:54:00', '2016-03-26 03:34:00', '2016-03-26 04:15:00',
       '2016-03-26 05:46:00', '2016-03-26 09:02:00']

In [ ]:
gas_list['DG038_1'] =  ['2016-05-06 10:34:00', '2016-05-06 14:25:00',
       '2016-05-06 17:58:00', '2016-05-06 19:34:00', '2016-05-06 21:22:00',
       '2016-05-06 23:36:00', '2016-05-07 03:11:00', '2016-05-07 05:45:00',
       '2016-05-07 10:13:00', '2016-05-07 12:45:00', '2016-05-07 16:33:00',
       '2016-05-07 19:01:00', '2016-05-08 00:19:00', '2016-05-08 03:54:00',
       '2016-05-08 09:13:00', '2016-05-08 13:41:00', '2016-05-08 19:30:00',
       '2016-05-08 22:35:00', '2016-05-09 03:53:00', '2016-05-09 10:19:00',
       '2016-05-09 11:59:00', '2016-05-09 18:45:00', '2016-05-09 21:53:00',
       '2016-05-10 03:06:00', '2016-05-10 07:38:00', '2016-05-10 12:08:00',
       '2016-05-10 13:51:00', '2016-05-10 13:59:00', '2016-05-10 17:35:00',
       '2016-05-10 23:06:00', '2016-05-11 05:13:00', '2016-05-11 10:03:00',]

In [ ]:
gas_list['DG038_2'] =['2016-05-17 18:01:00',
       '2016-05-18 01:14:00', '2016-05-18 07:04:00', '2016-05-18 11:49:00',
       '2016-05-18 17:54:00', '2016-05-19 03:08:00', '2016-05-19 12:57:00',
       '2016-05-19 17:57:00', '2016-05-20 03:34:00', '2016-05-20 09:09:00',
       '2016-05-20 17:20:00', '2016-05-21 00:46:00', '2016-05-21 01:57:00',
       '2016-05-21 05:34:00', '2016-05-21 09:35:00', '2016-05-21 14:07:00',
       '2016-05-21 18:18:00', '2016-05-21 21:35:00', '2016-05-22 02:25:00',
       '2016-05-22 09:43:00']

In [ ]:
gas_list['DG040_1'] = ['2016-06-09 16:24:00',
       '2016-06-09 16:57:00', '2016-06-09 19:15:00', '2016-06-09 21:51:00',
       '2016-06-09 23:12:00', '2016-06-09 23:21:00', '2016-06-09 23:32:00',
       '2016-06-10 01:17:00', '2016-06-10 03:28:00', '2016-06-10 07:36:00',
       '2016-06-10 11:34:00', '2016-06-10 11:42:00', '2016-06-10 18:03:00',
       '2016-06-10 19:27:00', '2016-06-10 19:41:00', '2016-06-10 22:04:00',
       '2016-06-11 02:53:00', '2016-06-11 11:17:00', '2016-06-11 15:45:00',
       '2016-06-11 16:47:00']

In [ ]:
gas_list['DG040_2'] = ['2016-06-21 14:17:00', '2016-06-21 18:55:00',
       '2016-06-21 23:24:00', '2016-06-22 07:03:00', '2016-06-22 09:55:00',
       '2016-06-22 13:36:00', '2016-06-22 13:47:00', '2016-06-22 17:59:00',
       '2016-06-22 22:16:00', '2016-06-23 00:36:00', '2016-06-23 05:34:00',
       '2016-06-23 09:41:00', '2016-06-23 14:09:00', '2016-06-23 19:05:00',
       '2016-06-23 23:00:00', '2016-06-24 04:57:00', '2016-06-24 09:25:00',
       '2016-06-24 11:27:00', '2016-06-24 13:33:00', '2016-06-24 19:39:00',
       '2016-06-24 22:51:00', '2016-06-24 01:17:00', '2016-06-24 07:24:00']

In [ ]:
gas_list['DG046_1'] = ['2016-07-10 20:40:00',
       '2016-07-10 21:45:00', '2016-07-10 23:12:00', '2016-07-11 00:26:00',
       '2016-07-11 03:11:00', '2016-07-11 05:43:00', '2016-07-11 07:17:00',
       '2016-07-11 09:04:00', '2016-07-11 16:58:00', '2016-07-11 18:18:00',
       '2016-07-11 21:44:00', '2016-07-12 01:08:00', '2016-07-12 06:55:00',
       '2016-07-12 10:02:00', '2016-07-12 15:09:00']

In [ ]:
# Only keep those pCO2 data when there is HFOV recording
for recording in recordings:
    start = gas_list[recording][0]
    end = gas_list[recording][-1]
    pCO2s[recording] = pCO2s[recording].ix[start:end]

Calculate DCO2s and weight-corrected DCO2s for 10 minute periods before the blood gases, combine them with the pCO2 readings, collect them in a dictionary of dictionaries and write it to multisheet Excel files.


In [ ]:
def DCO2_calculator(recording, column, time, interval, offset, ):
    
    '''
    in:
    -recording: recording name (string)
    -column: column to calculate the statistics on, e.g. DCO2 or DCO2_corr
    -time: time of blood gas (string)
    -interval: time in minutes (int) 
    -offset: time in minutes (int) 
    
    out:
    tuple of 9 values:
    - number of DCO2 measurements over the period
    - mean
    - st deviation
    - median
    - mad (mean absolute deviation) 
    - minimum 
    - 25th centile
    - 75th centile
    - maximum
    
    Calculates statistics about DCO2 values before a blood gas. The interval in 'interval' long (in minutes)
    and it ends 'offset' minutes before the blood gas 
    
    '''
    
    #time is given in nanoseconds: 1 min = 60000000000 nanoseconds
    end = pd.to_datetime(time) - pd.to_timedelta(offset * 60000000000)
    
    start = end - pd.to_timedelta(interval * 60000000000)
    
    # 1 sec (1000000000 nsec) needs to be taken away otherwise the period will be 901 seconds 
    # as it includes the last second
    data = slow_measurements_hfov[recording][start : end - pd.to_timedelta(1000000000)]
    
    stats = stats_calculator(data, column)
    
    return stats

In [ ]:
# Create a dictionary of DCO2 calculations
DCO2_stats_gases = {}
DCO2_stats_gases[interval, offset] = {}
columns = ['number_of_seconds', 'mean', 'stdev', 'median', 'mad', 'min', '25pc', '75pc', 'max', ]

for recording in recordings:
    lst = []
    for gas in gas_list[recording]:
        lst.append(DCO2_calculator(recording,'DCO2', gas, interval, offset))
    stats = DataFrame(lst, columns = columns, index = gas_list[recording])
    DCO2_stats_gases[interval, offset][recording] = stats
    
    # add pCO2 data
    DCO2_stats_gases[interval, offset][recording] = \
        DCO2_stats_gases[interval, offset][recording].join(pCO2s[recording])
    
    # Dropping the rows from the DataFrame that do not have DCO2 or pCO2 values
    # Some rows do not have DCO2 values because the baby received conventional ventilation
    # during this period (flanked by HFOV periods)
    # Some rows do not have pCO2 data because the blood gas was insufficient
    DCO2_stats_gases[interval, offset][recording].dropna(axis = 0, how = 'any', subset = ['mean', 'pCO2, POC']
                                                         , inplace = True)

In [ ]:
writer = pd.ExcelWriter('%s/%s_%d_%d.xlsx' % (dir_write, 'DCO2_stats_gases', interval, offset) )
for recording in recordings:
    DCO2_stats_gases[interval, offset][recording].to_excel(writer, recording)
writer.save()

In [ ]:
# Create a dictionary of weight-squared corrected DCO2 calculations

DCO2_corr_stats_gases = {}
DCO2_corr_stats_gases[interval, offset] = {}
columns = ['number_of_seconds', 'mean', 'stdev', 'median', 'mad', 'min', '25pc', '75pc', 'max', ]

for recording in recordings:
    lst = []
    for gas in gas_list[recording]:
        lst.append(DCO2_calculator(recording,'DCO2_corr', gas, interval, offset))
    stats = DataFrame(lst, columns = columns, index = gas_list[recording])
    DCO2_corr_stats_gases[interval, offset][recording] = stats
    
    # add pCO2 data
    DCO2_corr_stats_gases[interval, offset][recording] = \
        DCO2_corr_stats_gases[interval, offset][recording].join(pCO2s[recording])
    
    
    # Dropping the rows from the DataFrame that do not have DCO2 or pCO2 values
    # Some rows do not have DCO2 values because the baby received conventional ventilation
    # during this period (flanked by HFOV periods)
    # Some rows do not have pCO2 data because the blood gas was insufficient
    DCO2_corr_stats_gases[interval, offset][recording].dropna(axis = 0, how = 'any', subset = ['mean', 'pCO2, POC']
                                                         , inplace = True)

In [ ]:
writer = pd.ExcelWriter('%s/%s_%d_%d.xlsx' % (dir_write, 'DCO2_corr_stats_gases', interval, offset) )
for recording in recordings:
    DCO2_corr_stats_gases[interval, offset][recording].to_excel(writer, recording)
writer.save()

Combine all DCO2 and DCO2_corr data into single dataframes (one for each) and write them to excel files

standard DCO2


In [ ]:
DCO2_stats_gases_all = {}

lst = []
for recording in recordings:
    DCO2_stats_gases[interval, offset][recording]['recording'] = recording
    lst.append(DCO2_stats_gases[interval, offset][recording])
DCO2_stats_gases_all[interval, offset] = pd.concat(lst)

In [ ]:
DCO2_stats_gases_all[interval, offset];

weight_corrected DCO2


In [ ]:
DCO2_corr_stats_gases_all = {}

lst = []
for recording in recordings:
    DCO2_corr_stats_gases[interval, offset][recording]['recording'] = recording
    lst.append(DCO2_corr_stats_gases[interval, offset][recording])
DCO2_corr_stats_gases_all[interval, offset] = pd.concat(lst)

In [ ]:
DCO2_corr_stats_gases_all[interval, offset];

Write cumulative data to excel files and binary pickle files


In [ ]:
writer = pd.ExcelWriter('%s/%s_%d_%d.xlsx' % (dir_write, 'DCO2_stats_all_gases', interval, offset) )
DCO2_stats_gases_all[interval, offset].to_excel(writer, 'DCO2')
writer.save()

In [ ]:
writer = pd.ExcelWriter('%s/%s_%d_%d.xlsx' % (dir_write, 'DCO2_corr_stats_all_gases', interval, offset) )
DCO2_corr_stats_gases_all[interval, offset].to_excel(writer, 'DCO2_corr')
writer.save()

In [ ]:
DCO2_stats_gases_all[interval, offset].to_pickle('%s/%s_%d_%d.p'
                                                 % (dir_write, 'DCO2_stats_all_gases', interval, offset))

Calculate weight-corrected high frequency tidal volumes (VThfs) before the blood gases, combine them with the pCO2 readings, collect them in a dictionary of dictionaries


In [ ]:
# Create a dictionary of DCO2 calculations
VThf_stats_gases = {}
VThf_stats_gases[interval, offset] = {}
columns = ['number_of_seconds', 'mean', 'stdev', 'median', 'mad', 'min', '25pc', '75pc', 'max', ]

for recording in recordings:
    lst = []
    for gas in gas_list[recording]:
        lst.append(DCO2_calculator(recording,'VThf_kg', gas, interval, offset))
    stats = DataFrame(lst, columns = columns, index = gas_list[recording])
    VThf_stats_gases[interval, offset][recording] = stats
    
    # add pCO2 data
    VThf_stats_gases[interval, offset][recording] = \
        VThf_stats_gases[interval, offset][recording].join(pCO2s[recording])
    
    # Dropping the rows from the DataFrame that do not have DCO2 or pCO2 values
    # Some rows do not have DCO2 values because the baby received conventional ventilation
    # during this period (flanked by HFOV periods)
    # Some rows do not have pCO2 data because the blood gas was insufficient
    VThf_stats_gases[interval, offset][recording].dropna(axis = 0, how = 'any', subset = ['mean', 'pCO2, POC']
                                                         , inplace = True)

Combine all VThfs data into single dataframe


In [ ]:
VThf_stats_gases_all = {}
lst = []
for recording in recordings:
    VThf_stats_gases[interval, offset][recording]['recording'] = recording
    lst.append(VThf_stats_gases[interval, offset][recording])
VThf_stats_gases_all[interval, offset] = pd.concat(lst)

Calculate weight-square-corrected VThfs before the blood gases, combine them with the pCO2 readings, collect them in a dictionary of dictionaries


In [ ]:
# Create a dictionary of DCO2 calculations
VThf_sq_stats_gases = {}
VThf_sq_stats_gases[interval, offset] = {}
columns = ['number_of_seconds', 'mean', 'stdev', 'median', 'mad', 'min', '25pc', '75pc', 'max', ]

for recording in recordings:
    lst = []
    for gas in gas_list[recording]:
        lst.append(DCO2_calculator(recording,'VThf_kg_2', gas, interval, offset))
    stats = DataFrame(lst, columns = columns, index = gas_list[recording])
    VThf_sq_stats_gases[interval, offset][recording] = stats
    
    # add pCO2 data
    VThf_sq_stats_gases[interval, offset][recording] = \
        VThf_sq_stats_gases[interval, offset][recording].join(pCO2s[recording])
    
    # Dropping the rows from the DataFrame that do not have DCO2 or pCO2 values
    # Some rows do not have DCO2 values because the baby received conventional ventilation
    # during this period (flanked by HFOV periods)
    # Some rows do not have pCO2 data because the blood gas was insufficient
    VThf_sq_stats_gases[interval, offset][recording].dropna(axis = 0, how = 'any', subset = ['mean', 'pCO2, POC']
                                                         , inplace = True)

Combine all VThf-squared data into single dataframe


In [ ]:
VThf_sq_stats_gases_all = {}
lst = []
for recording in recordings:
    VThf_sq_stats_gases[interval, offset][recording]['recording'] = recording
    lst.append(VThf_sq_stats_gases[interval, offset][recording])
VThf_sq_stats_gases_all[interval, offset] = pd.concat(lst)

Create DataFrames with leak data together with the mean DCO2, DCO2_corr and with PCO2 data


In [ ]:
def parameter_calculator(recording, parameter, time, interval, offset, ):
    
    '''
    in:
    -recording: recording name (string)
    -parameter: parameter to calculate the mean on, e.g. DCO2 or DCO2_corr
    -time: time of blood gas (string)
    -interval: time in minutes (int) 
    -offset: time in minutes (int) 
    
    out:
    -
    
    Calculates the mean value of the parameter before a blood gas. 
    The interval in 'interval' long (in minutes) and it ends 'offset' minutes before the blood gas
    
    '''
    
    # time is given in nanoseconds: 1 min = 60000000000 nanoseconds
    end = pd.to_datetime(time) - pd.to_timedelta(offset * 60000000000)
    
    start = end - pd.to_timedelta(interval * 60000000000)
    
    # 1 sec (1000000000 nsec) needs to be taken away otherwise the period will be 901 seconds 
    # as it includes the last second
    data = slow_measurements_hfov[recording][start : end - pd.to_timedelta(1000000000)]
    
    return data[parameter].mean()

In [ ]:
# Create a dictionary of Vthf, DCO2 calculations together with the leak data
DCO2_stats_gases_leak = {}
DCO2_stats_gases_leak[interval, offset] = {}

for recording in recordings:
    columns = ['DCO2', 'DCO2_corr', '5001|% leak [%]', '5001|MVleak [L/min]', '5001|MVi [L/min]', 
               '5001|MVe [L/min]']
    par_dict = {}
    for gas in gas_list[recording]:
        lst = []
        lst.append(parameter_calculator(recording,'DCO2', gas, interval, offset))
        lst.append(parameter_calculator(recording,'DCO2_corr', gas, interval, offset))
        lst.append(parameter_calculator(recording,'5001|% leak [%]', gas, interval, offset))
        lst.append(parameter_calculator(recording,'5001|MVleak [L/min]', gas, interval, offset))
        lst.append(parameter_calculator(recording,'5001|MVi [L/min]', gas, interval, offset))
        lst.append(parameter_calculator(recording,'5001|MVe [L/min]', gas, interval, offset))
        par_dict[gas] = lst
        
    DCO2_stats_gases_leak[interval, offset][recording] = \
        DataFrame(par_dict).T
    DCO2_stats_gases_leak[interval, offset][recording].columns = columns
    
    # add pCO2 data
    DCO2_stats_gases_leak[interval, offset][recording] = \
        DCO2_stats_gases_leak[interval, offset][recording].join(pCO2s[recording])
    
    # Dropping the rows from the DataFrame that do not have  pCO2 values
    # Some rows do not have pCO2 data because the blood gas was insufficient
    DCO2_stats_gases_leak[interval, offset][recording].dropna(axis = 0, how = 'any', subset =  ['pCO2, POC']
                                                         , inplace = True)

Combine all leak data into single dataframes (one for each) and write them to excel files


In [ ]:
DCO2_stats_gases_leak_all = {}

lst = []
for recording in recordings:
    DCO2_stats_gases_leak[interval, offset][recording]['recording'] = recording
    lst.append(DCO2_stats_gases_leak[interval, offset][recording])
    
DCO2_stats_gases_leak_all[interval, offset] = pd.concat(lst)
DCO2_stats_gases_leak_all[interval, offset].columns = \
    ['DCO2', 'DCO2_corr', 'leak%', 'MV_leak', 'MVi', 'MVe', 'pCO2', 'specimen', 'recording']

In [ ]:
# Mean leak more that 5% before the blood gas
len(DCO2_stats_gases_leak_all[10,2][DCO2_stats_gases_leak_all[10,2]['leak%'] > 5]);

In [ ]:
# Mean leak more that 10% before the blood gas
len(DCO2_stats_gases_leak_all[10,2][DCO2_stats_gases_leak_all[10,2]['leak%'] > 10]);

In [ ]:
# Mean leak more that 25% before the blood gas
len(DCO2_stats_gases_leak_all[10,2][DCO2_stats_gases_leak_all[10,2]['leak%'] > 25]);

In [ ]:
# Mean leak more that 50% before the blood gas
len(DCO2_stats_gases_leak_all[10,2][DCO2_stats_gases_leak_all[10,2]['leak%'] > 50]);

Define functions to visualise data

Define function to calculate correlation


In [ ]:
def correl(x, y):

    '''
    input: two numeric arrays of the same size

    returns: a tuple of 
    1. Pearson's correlation coefficient: r
    2. low and high 95% confidence intervals or r (two values)
    3. Coefficient of determination: r^2
    4. p-value of correlation

    '''
    
    assert len(x) == len(y)
    
    r, p = pearsonr(x, y)
    f = 0.5*np.log((1+r)/(1-r))
    se = 1/np.sqrt(len(x)-3)
    ucl = f + 1.96 * se
    lcl = f - 1.96 * se

    lcl = (np.exp(2*lcl) - 1) / (np.exp(2*lcl) + 1)
    ucl = (np.exp(2*ucl) - 1) / (np.exp(2*ucl) + 1)

    return r , lcl, ucl , r*r, p

Define functions to visualize individual recordings and to save them to files


In [ ]:
def visualise_DCO2_corr(rec, i, xlimoffset = 20, ylimoffset = 2, textboxoffset_x = 0.55, textboxoffset_y = 0.9):
    
    '''
    input:
    - rec = list of recordings (list)
    - xlimoffset = how much longer is the X-axis than the highest X value (int)
    - ylimoffset = how much longer is the y-axis than the highest y value (int)
    - textboxoffset_x = vertical position of the textbox compared to X-lim, fraction (float)
    - textboxoffset_y = horizontal position of the textbox compared to y-lim, fraction (float)

    Draws a pCO2-DCO2_corr graph using data in recordings 'rec'. Points belonging to different recordings
    have different markers and colors.
    A correlation line is also drawn.
    It also calculates Spearman's correlation coefficient with confidence intervals and p-value. 
    Puts these data on the graph.
    '''
    
    total = DCO2_stats_gases_all[interval, offset]
    sample = total.ix[total['recording'] == recording]
    x = sample['mean']
    y = sample['pCO2, POC']
    if len(x) == 0:
        return
    
    ax = fig.add_subplot(len(rec),1,i+1);
    plt.scatter(x , y, color = 'black', marker = markers[0], s = 60)
    plt.ylabel(r'pCO$ \rm _2$ (kPa)', fontsize  = 14)
    plt.xlabel(r'DCO$ \rm _2$_corr (mL$\rm^2$/sec/kg$^2$)', fontsize  = 14)
    
    plt.title(r'%s: Weight-corrected DCO$ \rm _2$' % recording , fontsize = 14)
    xlim = ax.get_xlim()[1]
    ylim = ax.get_ylim()[1]
    plt.xlim([0, xlim + xlimoffset])
    plt.ylim([0, ylim + ylimoffset])
    # Polynomial Coefficients
    coeffs = np.polyfit(x, y, deg = 1)
    result = coeffs.tolist()
    
    # Fit a trendline
    l = np.poly1d(coeffs)
    plt.plot(x,l(x),'r--')

    # Calculate pearson's correlation coefficient with confidence intervals, coefficiet of determination and p value
    r , lcl, ucl , r2, p = correl(x,y)
    p = round(p, 4)

    # print the equation on the graph area 
    
    text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)\np=%.4f' % (result[0], result[1], r, lcl, ucl, p)

    plt.text(xlim * textboxoffset_x, ylim * textboxoffset_y, text, color = 'black', style='normal', fontsize=15, 
         bbox={'facecolor':'white', 'edgecolor':'red', 'alpha':1, 'pad':10})
    
    text2 = 'y=%.4fx+(%.4f)   r=%.4f (%.4f , %.4f)   p=%.4f' % (result[0], result[1], r, lcl, ucl, p)
    print(text2)
    
    fig.savefig('%s/%s_DCO2_corr.pdf' % (dir_write, recording), dpi = 300, facecolor='w', edgecolor='w',
        orientation='portrait', papertype=None, format = 'pdf',
        transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)

In [ ]:
def visualise_DCO2_corr_art(rec, i, xlimoffset = 20, ylimoffset = 2, textboxoffset_x = 0.55, textboxoffset_y = 0.9):
    
    '''
    input:
    - rec = list of recordings (list)
    - xlimoffset = how much longer is the X-axis than the highest X value (int)
    - ylimoffset = how much longer is the y-axis than the highest y value (int)
    - textboxoffset_x = vertical position of the textbox compared to X-lim, fraction (float)
    - textboxoffset_y = horizontal position of the textbox compared to y-lim, fraction (float)

    Draws a pCO2-DCO2_corr graph using data in recordings 'rec'. Points belonging to different recordings
    have different markers and colors. THIS FUNCTION ONLY CONSIDERS ARTERIAL BLOOD GASES.
    A correlation line is also drawn.
    It also calculates Spearman's correlation coefficient with confidence intervals and p-value. 
    Puts these data on the graph.
    '''
    
    total = DCO2_stats_gases_all[interval, offset]
    sample = total.ix[total['recording'] == recording]
    sample_art = sample.ix[sample['Blood specimen type, POC'] == 'Arteri...']
    x = sample_art['mean']
    y = sample_art['pCO2, POC']
    if len(x) == 0:
        return
    
    ax = fig.add_subplot(len(rec),1,i+1);
    plt.scatter(x , y, color = 'black', marker = markers[0], s = 60)
    plt.ylabel("pCO2 (kPa)", fontsize  = 16)
    plt.xlabel("DCO2 (ml^2/kg^2.sec)", fontsize  = 16)
    plt.title("%s: Weight-corrected DCO2 -  arterial pCO2" % recording  , fontsize = 18)
    xlim = ax.get_xlim()[1]
    ylim = ax.get_ylim()[1]
    plt.xlim([0, xlim + xlimoffset])
    plt.ylim([0, ylim + ylimoffset])
    # Polynomial Coefficients
    coeffs = np.polyfit(x, y, deg = 1)
    result = coeffs.tolist()
    
    # Fit a trendline
    l = np.poly1d(coeffs)
    plt.plot(x,l(x),'r--')

    # Calculate pearson's correlation coefficient with confidence intervals, coefficiet of determination and p value
    r , lcl, ucl , r2, p = correl(x,y)
    p = round(p, 3)

    # print the equation on the graph area 
    text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)\np=%.3f' % (result[0], result[1], r, lcl, ucl, p)
    plt.text(xlim * textboxoffset_x, ylim * textboxoffset_y, text, color = 'black', style='normal', fontsize=15, 
         bbox={'facecolor':'white', 'edgecolor':'red', 'alpha':1, 'pad':10})
    
    fig.savefig('%s/%s_DCO2_corr_art.pdf' % (dir_write, recording), dpi = 300, facecolor='w', edgecolor='w',
        orientation='portrait', papertype=None, format = 'pdf',
        transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)

Visualise data from individual recordings where at least ten data points are available

All blood gases

These graphs are shown in Supplementary Figure 3 of the paper.


In [ ]:
# Select only recordings where at least 10 blood gases are available
select_recs = [key for key in pCO2s if len(pCO2s[key]) >= 10]
select_recs = sorted(select_recs)
select_recs;

In [ ]:
recordings = select_recs[0:1]

fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 6, textboxoffset_x = 0.50, textboxoffset_y = 1.1)
    
plt.close()

In [ ]:
recordings = select_recs[1:2]

fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 4, textboxoffset_x = 0.66, textboxoffset_y = 1.2)
    
plt.close()

In [ ]:
recordings = select_recs[2:3]

fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.55, textboxoffset_y = 1.2)
    
plt.close()

In [ ]:
recordings = select_recs[3:4]

fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.5, textboxoffset_y = 1.1)
    
plt.close()

In [ ]:
recordings = select_recs[4:5]

fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.55, textboxoffset_y = 1.2)
    
plt.close()

In [ ]:
recordings = select_recs[5:6]

fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.5, textboxoffset_y = 1.2)
    
plt.close()

In [ ]:
recordings = select_recs[6:7]

fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.45, textboxoffset_y = 1)
    
plt.close()

In [ ]:
recordings = select_recs[7:8]

fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 1.2, textboxoffset_y = 1.3)
    
plt.close()

In [ ]:
recordings = select_recs[8:9]

fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.55, textboxoffset_y = 1.1)
    
plt.close()

In [ ]:
recordings = select_recs[9:10]

fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.55, textboxoffset_y = 1.1)
    
plt.close()

In [ ]:
recordings = select_recs[10:11]

fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.6, textboxoffset_y = 1.2)
    
plt.close()

In [ ]:
recordings = select_recs[11:12]

fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.6, textboxoffset_y = 1.2)
    
plt.close()

In [ ]:
recordings = select_recs[12:13]

fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.75, textboxoffset_y = 1.2)

plt.close()

In [ ]:
recordings = select_recs[13:]

fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.55, textboxoffset_y = 1.1)

plt.close()

Arterial gases only


In [ ]:
# Select only recordings where at least 10 ARTERIAL blood gases are available
select_recs_art = [key for key in pCO2s if 
               len(pCO2s[key].ix[pCO2s[key]['Blood specimen type, POC'] == 'Arteri...']) >= 10]
select_recs_art = sorted(select_recs_art)
select_recs_art;

In [ ]:
recordings = select_recs_art[0:1]

fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    visualise_DCO2_corr_art(recordings, i, xlimoffset = 20, ylimoffset = 5, 
                            textboxoffset_x = 0.65, textboxoffset_y = 1.3)

plt.close()

In [ ]:
recordings = select_recs_art[1:2]

fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    visualise_DCO2_corr_art(recordings, i, xlimoffset = 20, ylimoffset = 5, 
                            textboxoffset_x = 0.5, textboxoffset_y = 1.2)
    
plt.close()

In [ ]:
recordings = select_recs_art[2:3]

fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    visualise_DCO2_corr_art(recordings, i, xlimoffset = 20, ylimoffset = 5, 
                            textboxoffset_x = 0.45, textboxoffset_y = 1)

plt.close()

In [ ]:
recordings = select_recs_art[3:4]

fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    visualise_DCO2_corr_art(recordings, i, xlimoffset = 20, ylimoffset = 5, 
                            textboxoffset_x = 1.3, textboxoffset_y = 1.25)

plt.close()

In [ ]:
recordings = select_recs_art[4:5]

fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    visualise_DCO2_corr_art(recordings, i, xlimoffset = 20, ylimoffset = 5, 
                            textboxoffset_x = 0.6, textboxoffset_y = 1.3)

plt.close()

In [ ]:
recordings = select_recs_art[5:6]

fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    visualise_DCO2_corr_art(recordings, i, xlimoffset = 20, ylimoffset = 5, 
                            textboxoffset_x = 0.6, textboxoffset_y = 1.2)
    
plt.close()

Create ROC curve


In [ ]:
val = range(0, 180, 5)
true_pos = []
false_pos = []
true_neg = []
false_neg = []

a = DCO2_stats_gases_leak_all[10,2]

for i in val:
    pos = a[a['DCO2_corr'] >= i]
    neg = a[a['DCO2_corr'] < i]
    tp = len(pos[pos['pCO2'] <= 8])
    fp = len(pos[pos['pCO2'] > 8]) 
    tn = len(neg[neg['pCO2'] > 8])
    fn = len(neg[neg['pCO2'] <= 8])
    true_pos.append(tp)
    false_pos.append(fp)
    true_neg.append(tn)
    false_neg.append(fn)

In [ ]:
DCO2_test = DataFrame({'tp': true_pos, 'fp': false_pos, 'tn': true_neg, 'fn': false_neg}, index = val )

In [ ]:
DCO2_test['sensitivity'] = round(DCO2_test.tp / (DCO2_test.tp + DCO2_test.fn), 3)
DCO2_test['specificity'] = round(DCO2_test.tn / (DCO2_test.tn + DCO2_test.fp), 3)
DCO2_test['pos_pred_value'] = round(DCO2_test.tp / (DCO2_test.tp + DCO2_test.fp), 3)
DCO2_test['neg_pred_value'] = round(DCO2_test.tn / (DCO2_test.tn + DCO2_test.fn), 3)
DCO2_test['Youden'] = round(DCO2_test.sensitivity + DCO2_test.specificity - 1, 3)

In [ ]:
DCO2_test[DCO2_test.Youden == DCO2_test.Youden.max()];

In [ ]:
DCO2_test['1-specificity'] = 1 - DCO2_test['specificity']

In [ ]:
DCO2_test.sort_values('1-specificity', inplace = True)

In [ ]:
DCO2_test;

In [ ]:
DCO2_test['1-specificity'].iloc[30];

In [ ]:
DCO2_test['sensitivity'].iloc[30];

In [ ]:
AUC = 0 # Area under the ROC curve

for i in range(len(DCO2_test['1-specificity'])-2):
    c_high = ((DCO2_test['1-specificity'].iloc[i+1] - DCO2_test['1-specificity'].iloc[i]) *  
        DCO2_test['sensitivity'].iloc[i+1])
    c_low = ((DCO2_test['1-specificity'].iloc[i+1] - DCO2_test['1-specificity'].iloc[i]) *  
        DCO2_test['sensitivity'].iloc[i])
    c = (c_high + c_low) / 2
    AUC += c

Select blood gases with DCO2_corr values over 60 mL2/kg2/sec

All gases


In [ ]:
s = DCO2_corr_stats_gases_all[interval, offset]
s = s[s['mean'] > 60]

In [ ]:
len(s);

In [ ]:
s[s['pCO2, POC'] > 8];

In [ ]:
s['recording'].unique();

In [ ]:
len(s['recording'].unique());

Arterial blood gases


In [ ]:
s = DCO2_corr_stats_gases_all[interval, offset]
s = s.ix[s['Blood specimen type, POC'] == 'Arteri...']
s = s[s['mean'] > 60]

In [ ]:
len(s);

In [ ]:
s[s['pCO2, POC'] > 8];

In [ ]:
s['recording'].unique();

In [ ]:
len(s['recording'].unique());

Generate figures for the manuscript


In [ ]:
recordings = [ 'DG005_1', 'DG005_2', 'DG005_3', 'DG006_1', 'DG009',  'DG016', 'DG017', 'DG018_1',  
              'DG020', 'DG022', 'DG025', 'DG027', 'DG032_2', 'DG038_1', 'DG038_2', 'DG040_1', 'DG040_2', 'DG046_1']

Figures in the main article

Figure 1


In [ ]:
fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    total = DCO2_stats_gases_all[interval, offset]
    sample = total.ix[total['recording'] == recording]
    x = sample['mean']
    y = sample['pCO2, POC']
    fig.add_subplot(1,1,1);
    plt.scatter(x , y, color = colors[i] , marker = markers[i], s = 25)
    plt.ylabel(r'pCO$ \rm _2$ (kPa)', fontsize  = 14)
    plt.xlabel(r'DCO$ \rm _2$ (mL$\rm^2$/sec)', fontsize  = 14)
    plt.tick_params(axis='both', which='major', labelsize=14)
    plt.xlim([0,600])
    plt.title(r'Uncorrected DCO$ \rm _2$ - all gases' , fontsize = 14);


a = DCO2_stats_gases_all[interval, offset]['mean']
b = DCO2_stats_gases_all[interval, offset]['pCO2, POC']

# Polynomial Coefficients
coeffs = np.polyfit(a, b, deg = 1)
result = coeffs.tolist()
    
# Fit a trendline
l = np.poly1d(coeffs)
plt.plot(a,l(a),'--', color = 'red')

# Calculate pearson's correlation coefficient with confidence intervals, coefficiet of determination and p value
r , lcl, ucl , r2, p = correl(a,b)

# print the equation on the graph area 
    
text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)' % (result[0], result[1], r, lcl, ucl)
plt.text(275, b.max()-2, text, color = 'black', style='normal', fontsize=14, 
         bbox={'facecolor':'white', 'edgecolor':'black', 'alpha':1, 'pad':10});

fig.savefig('%s/Figure_1_color.tiff' % dir_write, dpi = 300, facecolor='w', edgecolor='w',
        orientation='portrait', papertype=None, format = 'tiff',
        transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)

plt.close()

Figure 2


In [ ]:
x = DCO2_stats_gases_all[interval, offset]['mean']
y = DCO2_corr_stats_gases_all[interval, offset]['mean']

fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
ax1 = fig.add_subplot(1, 1, 1);
bp = plt.boxplot([x, y] )

plt.setp(bp['boxes'], color='black')
plt.setp(bp['whiskers'], color='black')
plt.setp(bp['medians'], color='red')
plt.setp(bp['fliers'], color='black', marker='+')
plt.ylabel("Units", fontsize  = 14)
plt.xlabel("", fontsize  = 14)
plt.xticks([1,2], [r'DCO$ \rm _2$ (mL$\rm^2$/sec)', r'DCO$ \rm _2$ (mL$\rm^2$/sec/kg$^2$)'], size = 14)
plt.tick_params(axis='both', which='major', labelsize=14)
plt.title(r'Boxplots of all DCO$ \rm _2$ data' , fontsize = 14);

# calculate p values
t_stats, p = ttest_rel(x, y)
ax1.text(ax1.get_xlim()[1]-1.2, 550, 'p<0.001', color = 'red', style='normal', fontsize=14, 
         bbox={'facecolor':'white', 'edgecolor':'None', 'alpha':1, 'pad':10}); 

fig.savefig('%s/Figure_2_color.tiff' % dir_write, dpi = 300, facecolor='w', edgecolor='w',
        orientation='portrait', papertype=None, format = 'tiff',
        transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)

plt.close()

Figure 3


In [ ]:
fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

for i, recording in enumerate(recordings):
    total = DCO2_corr_stats_gases_all[interval, offset]
    sample = total.ix[total['recording'] == recording]
    x = sample['mean']
    y = sample['pCO2, POC']
    fig.add_subplot(1,1,1);
    ax = plt.scatter(x , y, color = colors[i], marker = markers[i], s = 25)
    
plt.axvline(x= 50 , linewidth=1, color = 'black')
plt.axhline(y= 8,  linewidth=1, color = 'black')
plt.ylabel(r'pCO$ \rm _2$ (kPa)', fontsize  = 14)
plt.xlabel(r'DCO$ \rm _2$_corr (mL$\rm^2$/sec/kg$^2$) - all gases', fontsize  = 14)
plt.tick_params(axis='both', which='major', labelsize=14)

plt.xlim([0,200])
plt.title(r'Weight-corrected DCO$ \rm _2$ - all gases' , fontsize = 14)

a = DCO2_corr_stats_gases_all[interval, offset]['mean']
b = DCO2_corr_stats_gases_all[interval, offset]['pCO2, POC']
    
# Polynomial Coefficients
coeffs = np.polyfit(a, b, deg = 1)
result = coeffs.tolist()
    
# Fit a trendline
l = np.poly1d(coeffs)
plt.plot(a,l(a), '--', color = 'red', linewidth = 2)

# Calculate pearson's correlation coefficient with confidence intervals, coefficiet of determination and p value
r , lcl, ucl , r2, p = correl(a,b)

# print the equation on the graph area 
text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)\np<0.001' % (result[0], result[1], r, lcl, ucl)
plt.text(a.max()*0.47, b.max() * 0.85, text, color = 'black', style='normal', fontsize=14, 
         bbox={'facecolor':'white', 'edgecolor':'black', 'alpha':1, 'pad':10})

fig.savefig('%s/Figure_3_color.tiff' % dir_write, dpi = 300, facecolor='w', edgecolor='w',
        orientation='portrait', papertype=None, format = 'tiff',
        transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)

plt.close()

Figure 4


In [ ]:
# ROC curve
# A corrected DCO2 > 50 ml/min/kg2 predict a pCO2 < 8 kPa with a senstiivty of 0.390 and a specificity of 0.825 
# (Youden score = 0.215)
x = [0, 1]; y = [0, 1]

fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.7)
fig.add_subplot(1,1,1)

text = r'DCO$ \rm _2$corr = 50'
text2 = 'AUC = %.3f' % AUC

plt.plot(1 - DCO2_test.specificity , DCO2_test.sensitivity, c = 'black')
ax = plt.plot(x, y, c = 'black', linestyle = '--', )
plt.vlines(1 - 0.824561, 1 - 0.824561, 0.390, color='k', linewidth = 3)
plt.text(0.05, 0.60, text, color = 'black', style='normal', fontsize=14, 
         bbox={'facecolor':'white', 'edgecolor':'black', 'alpha':1, 'pad':10});

plt.text(0.6, 0.3, text2, color = 'black', style='normal', fontsize=14, 
         bbox={'facecolor':'white', 'edgecolor':'black', 'alpha':1, 'pad':10});

plt.title('ROC curve', size = 14)
plt.ylabel('Sensitivity', fontsize  = 14)
plt.xlabel('1 - Specificity', fontsize  = 14)
plt.grid()

fig.savefig('%s/Figure_4.tiff' % dir_write , dpi = 300, facecolor='w', edgecolor='w',
        orientation='portrait', papertype=None, format = 'tiff',
        transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)

plt.close()

Figure 5


In [ ]:
fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.7)

subset = DCO2_stats_gases_leak_all[10,2][DCO2_stats_gases_leak_all[10,2]['leak%'] < 10]
for i, recording in enumerate(recordings):
    total = subset
    sample = total.ix[total['recording'] == recording]
    x = sample['DCO2_corr']
    y = sample['pCO2']
    fig.add_subplot(1,1,1);
    plt.scatter(x , y, color = colors[i], marker = markers[i], s = 25)
    
plt.ylabel(r'pCO$ \rm _2$ (kPa)', fontsize  = 14)
plt.xlabel(r'DCO$ \rm _2$_corr (mL$\rm^2$/sec/kg$^2$)', fontsize  = 14)
plt.xlim([0,200])
plt.title(r'Weight-corrected DCO$ \rm _2$ - leak < 10%' , fontsize = 14)
    
a = total['DCO2_corr']
b = total['pCO2']
    
# Polynomial Coefficients
coeffs = np.polyfit(a, b, deg = 1)
result = coeffs.tolist()
    
# Fit a trendline
l = np.poly1d(coeffs)
plt.plot(a,l(a),'r--')

# Calculate pearson's correlation coefficient with confidence intervals, coefficiet of determination and p value
r , lcl, ucl , r2, p = correl(a,b)

# print the equation on the graph area 
text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)\np<0.001' % (result[0], result[1], r, lcl, ucl)
plt.text(a.max()*0.52, b.max()*0.84, text, color = 'black', style='normal', fontsize=14, 
         bbox={'facecolor':'white', 'edgecolor':'red', 'alpha':1, 'pad':10})

fig.savefig('%s/Figure_5_color.tiff' % dir_write, dpi = 300, facecolor='w', edgecolor='w',
        orientation='portrait', papertype=None, format = 'tiff',
        transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)

plt.close()

Supplementary figures

Supplementary Figure 1


In [ ]:
fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
fig.suptitle('E-Figure 1', size = 14)

for i, recording in enumerate(recordings):
    total = DCO2_stats_gases_all[interval, offset]
    sample = total.ix[total['recording'] == recording]
    sample_art = sample.ix[sample['Blood specimen type, POC'] == 'Arteri...']
    x = sample_art['mean']
    y = sample_art['pCO2, POC']
    fig.add_subplot(1,1,1);
    plt.scatter(x , y, color = colors[i], marker = markers[i], s = 25)
    plt.ylabel(r'pCO$ \rm _2$ (kPa)', fontsize  = 14)
    plt.xlabel(r'DCO$ \rm _2$ (mL$\rm^2$/sec)', fontsize  = 14)
    plt.tick_params(axis='both', which='major', labelsize=14)
    plt.xlim([0,600])
    plt.title(r'Uncorrected DCO$ \rm _2$ - arterial gases only' , fontsize = 14);

sample2 = total.ix[total['Blood specimen type, POC'] == 'Arteri...']
a = sample2['mean']
b = sample2['pCO2, POC']
    
# Polynomial Coefficients
coeffs = np.polyfit(a, b, deg = 1)
result = coeffs.tolist()
    
# Fit a trendline
l = np.poly1d(coeffs)
plt.plot(a,l(a),'--', color = 'red')

# Calculate pearson's correlation coefficient with confidence intervals, coefficiet of determination and p value
r , lcl, ucl , r2, p = correl(a,b)

# print the equation on the graph area 
text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)' % (result[0], result[1], r, lcl, ucl)
plt.text(a.max()*0.48, b.max()-2, text, color = 'black', style='normal', fontsize=14, 
         bbox={'facecolor':'white', 'edgecolor':'black', 'alpha':1, 'pad':10});

fig.savefig('%s/E_Figure_1_color.tiff' % dir_write, dpi = 300, facecolor='w', edgecolor='w',
        orientation='portrait', papertype=None, format = 'tiff',
        transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)

plt.close()

Supplementary Figure 2


In [ ]:
fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.7)
fig.suptitle('E-Figure 2', size = 14)

for i, recording in enumerate(recordings):
    total = DCO2_corr_stats_gases_all[interval, offset]
    sample = total.ix[total['recording'] == recording]
    sample_art = sample.ix[sample['Blood specimen type, POC'] == 'Arteri...']
    x = sample_art['mean']
    y = sample_art['pCO2, POC']
    fig.add_subplot(1,1,1);
    plt.scatter(x , y, color = colors[i], marker = markers[i], s = 25)

plt.axvline(x= 60 , linewidth=1, color = 'black')
plt.axhline(y= 8,  linewidth=1, color = 'black')    
plt.ylabel(r'pCO$ \rm _2$ (kPa)', fontsize  = 14)
plt.xlabel(r'DCO$ \rm _2$_corr (mL$\rm^2$/sec/kg$^2$)', fontsize  = 14)
plt.tick_params(axis='both', which='major', labelsize=14)
plt.xlim([0,200])
plt.title(r'Weight-corrected DCO$ \rm _2$ - arterial gases only' , fontsize = 14)
    

sample2 = total.ix[total['Blood specimen type, POC'] == 'Arteri...']
a = sample2['mean']
b = sample2['pCO2, POC']
    
# Polynomial Coefficients
coeffs = np.polyfit(a, b, deg = 1)
result = coeffs.tolist()
    
# Fit a trendline
l = np.poly1d(coeffs)
plt.plot(a,l(a), '--', color = 'red',  linewidth = 2)

# Calculate pearson's correlation coefficient with confidence intervals, coefficient of determination and p value
r , lcl, ucl , r2, p = correl(a,b)


# print the equation on the graph area 
    
text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)\np=%.3f' % (result[0], result[1], r, lcl, ucl, p)


plt.text(a.max()*0.48, b.max()*0.83, text, color = 'black', style='normal', fontsize=14, 
         bbox={'facecolor':'white', 'edgecolor':'red', 'alpha':1, 'pad':10});


fig.savefig('%s/E_figure_2_color.tiff' % dir_write, dpi = 300, facecolor='w', edgecolor='w',
        orientation='portrait', papertype=None, format = 'tiff',
        transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)

plt.close()

For Supplementary Figure 3 of the paper please see the functions creating the individual graphs above.

Supplementary Figure 4


In [ ]:
fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.7)
fig.suptitle('E-Figure 4', size = 14)

subset = DCO2_stats_gases_leak_all[10,2][DCO2_stats_gases_leak_all[10,2]['leak%'] < 10]
for i, recording in enumerate(recordings):
    total = subset
    sample = total.ix[total['recording'] == recording]
    sample_art = sample.ix[subset['specimen'] == 'Arteri...']
    x = sample_art['DCO2_corr']
    y = sample_art['pCO2']
    fig.add_subplot(1,1,1);
    plt.scatter(x , y, color = colors[i], marker = markers[i], s = 25)
    
plt.ylabel(r'pCO$ \rm _2$ (kPa)', fontsize  = 14)
plt.xlabel(r'DCO$ \rm _2$_corr (mL$\rm^2$/sec/kg$^2$)', fontsize  = 14)
plt.xlim([0,200])
plt.title(r'Weight-corrected DCO$ \rm _2$ - leak<10%' , fontsize = 14)
    
sample2 = total.ix[total['specimen'] == 'Arteri...']
a = sample2['DCO2_corr']
b = sample2['pCO2']
    
# Polynomial Coefficients
coeffs = np.polyfit(a, b, deg = 1)
result = coeffs.tolist()
    
# Fit a trendline
l = np.poly1d(coeffs)
plt.plot(a,l(a),'--', color = 'red')

# Calculate pearson's correlation coefficient with confidence intervals, coefficiet of determination and p value
r , lcl, ucl , r2, p = correl(a,b)

# print the equation on the graph area 
text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)\np=%.3f' % (result[0], result[1], r, lcl, ucl, p)
plt.text(a.max()*0.52, b.max()*0.84, text, color = 'black', style='normal', fontsize=14, 
         bbox={'facecolor':'white', 'edgecolor':'red', 'alpha':1, 'pad':10})

fig.savefig('%s/E_figure_4_color.tiff' % dir_write, dpi = 300, facecolor='w', edgecolor='w',
        orientation='portrait', papertype=None, format = 'tiff',
        transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)

plt.close()

Supplementary Figure 5


In [ ]:
fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.7)
fig.suptitle('E-Figure 5', size = 14)

subset = DCO2_stats_gases_leak_all[10,2][DCO2_stats_gases_leak_all[10,2]['leak%'] >= 10]
for i, recording in enumerate(recordings):
    total = subset
    sample = total.ix[total['recording'] == recording]
    sample_art = sample.ix[subset['specimen'] == 'Arteri...']
    x = sample_art['DCO2_corr']
    y = sample_art['pCO2']
    fig.add_subplot(1,1,1);
    plt.scatter(x , y, color = colors[i], marker = markers[i], s = 25)
    
plt.ylabel(r'pCO$ \rm _2$ (kPa)', fontsize  = 14)
plt.xlabel(r'DCO$ \rm _2$_corr (mL$\rm^2$/sec/kg$^2$)', fontsize  = 14)
plt.xlim([0,200])
plt.title(r'Weight-corrected DCO$ \rm _2$ - leak>10%' , fontsize = 14)
    
sample2 = total.ix[total['specimen'] == 'Arteri...']
a = sample2['DCO2_corr']
b = sample2['pCO2']
    
# Polynomial Coefficients
coeffs = np.polyfit(a, b, deg = 1)
result = coeffs.tolist()
    
# Fit a trendline
l = np.poly1d(coeffs)
plt.plot(a,l(a),'--', color = 'red')

# Calculate pearson's correlation coefficient with confidence intervals, coefficiet of determination and p value
r , lcl, ucl , r2, p = correl(a,b)

# print the equation on the graph area 
text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)\np=%.3f' % (result[0], result[1], r, lcl, ucl, p)
plt.text(a.max()*0.52, b.max()*0.84, text, color = 'black', style='normal', fontsize=14, 
         bbox={'facecolor':'white', 'edgecolor':'red', 'alpha':1, 'pad':10})

fig.savefig('%s/E_figure_5_color.tiff' % dir_write, dpi = 300, facecolor='w', edgecolor='w',
        orientation='portrait', papertype=None, format = 'tiff',
        transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)

plt.close()

Supplementary Figure 6


In [ ]:
fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.7)
fig.suptitle('E-Figure 6', size = 14)

for i, recording in enumerate(recordings):
    total = VThf_stats_gases_all[interval, offset]
    sample = total.ix[total['recording'] == recording]
    x = sample['mean']
    y = sample['pCO2, POC']
    fig.add_subplot(1,1,1);
    plt.scatter(x , y, color = colors[i], marker = markers[i], s = 25)
    
plt.ylabel(r'pCO$ \rm _2$ (kPa)', fontsize  = 14)
plt.xlabel(r'VThf (mL/kg)', fontsize  = 14)
plt.xlim([0,5])
# plt.title(r'Tidal volume' , fontsize = 14)
    
a = VThf_stats_gases_all[interval, offset]['mean']
b = VThf_stats_gases_all[interval, offset]['pCO2, POC']
    
# Polynomial Coefficients
coeffs = np.polyfit(a, b, deg = 1)
result = coeffs.tolist()
    
# Fit a trendline
l = np.poly1d(coeffs)
plt.plot(a,l(a),'--', color = 'red')

# Calculate pearson's correlation coefficient with confidence intervals, coefficiet of determination and p value
r , lcl, ucl , r2, p = correl(a,b)

# print the equation on the graph area 
text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)\np<0.001' % (result[0], result[1], r, lcl, ucl)
plt.text(a.max()*0.52, b.max()*0.84, text, color = 'black', style='normal', fontsize=14, 
         bbox={'facecolor':'white', 'edgecolor':'red', 'alpha':1, 'pad':10})

fig.savefig('%s/E_figure_6_color.tiff' % dir_write, dpi = 300, facecolor='w', edgecolor='w',
        orientation='portrait', papertype=None, format = 'tiff',
        transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)

plt.close()

Supplementary Figure 7


In [ ]:
fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.7)
fig.suptitle('E-Figure 7', size = 14)

for i, recording in enumerate(recordings):
    total = VThf_sq_stats_gases_all[interval, offset]
    sample = total.ix[total['recording'] == recording]
    x = sample['mean']
    y = sample['pCO2, POC']
    fig.add_subplot(1,1,1);
    plt.scatter(x , y, color = colors[i], marker = markers[i], s = 25)
    
plt.ylabel(r'pCO$ \rm _2$ (kPa)', fontsize  = 14)
plt.xlabel(r'VThf (mL$^2$/kg$^2$)', fontsize  = 14)
plt.xlabel(r'VThf$^2$ (mL$^2$/kg$^2$)', fontsize  = 14)
    
a = VThf_sq_stats_gases_all[interval, offset]['mean']
b = VThf_sq_stats_gases_all[interval, offset]['pCO2, POC']
    
# Polynomial Coefficients
coeffs = np.polyfit(a, b, deg = 1)
result = coeffs.tolist()
    
# Fit a trendline
l = np.poly1d(coeffs)
plt.plot(a,l(a),'--', color = 'red')

# Calculate pearson's correlation coefficient with confidence intervals, coefficiet of determination and p value
r , lcl, ucl , r2, p = correl(a,b)

# print the equation on the graph area 
text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)\np<0.001' % (result[0], result[1], r, lcl, ucl)
plt.text(a.max()*0.43, b.max()*0.83, text, color = 'black', style='normal', fontsize=14, 
         bbox={'facecolor':'white', 'edgecolor':'red', 'alpha':1, 'pad':10})

fig.savefig('%s/E_Figure_7_color.tiff' % dir_write, dpi = 300, facecolor='w', edgecolor='w',
        orientation='portrait', papertype=None, format = 'tiff',
        transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)

plt.close()