This file contains the code used for data processing, statistical analysis and visualization described in the following paper: "Weight-correction of carbon dioxide diffusion coefficient (DCO2) reduces its inter-individual variability and improves its correlation with blood carbon dioxide levels in neonates receiving high-frequency oscillatory ventilation." Pediatric Pulmonology, 2017;1–7. https://doi.org/10.1002/ppul.23759
Authors: Gusztav Belteki MD, PhD, FRCPCH, Benjamin Lin BA, Colin Morley MD, FRCPCH
Contact: gbelteki@aol.com
The outputs (numbers, tables, graphs) of this Notebook have been suppresses to comply with copyrights. The corresponding data and graphs can be found in the paper.
In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import re
import operator
import warnings
from pandas import Series, DataFrame
from scipy.stats.stats import pearsonr
from scipy.stats import ttest_rel
from datetime import datetime, timedelta
from pprint import pprint
%matplotlib inline
pd.set_option('display.max_rows', 20)
pd.set_option('display.max_columns', 50)
warnings.simplefilter('ignore')
In [2]:
# Directory to read the data from
%cd /Users/guszti/ventilation_data
cwd = os.getcwd()
In [3]:
# Directory to write the data to
dir_write = '/Users/guszti/ventilation_data/Analyses/DCO2/revision'
In [4]:
# Part or all of these recordings is HFOV
recordings = [ 'DG005_1', 'DG005_2', 'DG005_3', 'DG006_1', 'DG009', 'DG016', 'DG017', 'DG018_1',
'DG020', 'DG022', 'DG025', 'DG027', 'DG032_2', 'DG038_1', 'DG038_2', 'DG040_1', 'DG040_2', 'DG046_1']
In [5]:
clinical_details = {}
current_weights = {}
blood_gases = {}
pCO2s = {}
sedation = {}
vent_modes = {}
vent_modes_selected = {} # only important mode parameters are kept in this one
vent_settings = {}
vent_settings_selected = {} # only important mode parameters are kept in this one
slow_measurements = {} # 'slow measurements' are the ventilator parameters obtained with 1/sec frequeency
slow_measurements_hfov = {} # this only contains those parts of 'slow measurements' which do have a DCO2 reading
# and therefore are hfov ventilation periods
In [6]:
# Analysing DCO2s over "interval" minutes stopping "offset" minutes prior to the time of the blood gas
interval = 10 # 10 minutes
offset = 2 # 2 minutes
In [7]:
# color and marker set for visualization
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'magenta', 'maroon', 'navy', 'salmon', 'silver', 'tomato',
'violet', 'orchid', 'peru', 'wheat', 'lime', 'limegreen']
markers = [".", "+", ",", "x", "o", "D", "d", "8", "s", "p", "*", "h", 0, 4, "<", "3",
1, 5, ">", "4", 2, 6, "^", "2", 3, 7, "v", "1"]
In [8]:
for recording in recordings:
clinical_details[recording] = pd.read_excel('%s/%s' % (cwd, 'data_grabber_patient_data.xlsx'),
sheetname = recording[:5])
In [9]:
# If there are multiple recordings for the same patient this imports the weight relevant for the given recording
for recording in recordings:
w = 1 if len(recording) == 5 else int(recording[-1])
current_weights[recording] = clinical_details[recording].ix[4, w]/1000
In [10]:
def data_loader(lst):
'''
lst -> DataFrame
- Takes a list of csv files (given as filenames with absolute paths) and import them as a dataframes
- Combines the 'Date' and 'Time' columns into a Datetime index while keeping the original columns
- If there are more files it concatenates them into a single dataframe.
- It returns the concatenated data as one DataFrame.
'''
data = []
for i, filename in enumerate(lst):
data.append(pd.read_csv(lst[i], keep_date_col = 'True', parse_dates = [['Date', 'Time']]))
data = pd.concat(data)
data.index = data.Date_Time
return data
In [11]:
def slow_setting_finder(lst):
'''
list -> list
Takes a list of filenames and returns those ones that end with '_slow_Setting.csv'
'''
return [n for n in lst if n.endswith('_slow_Setting.csv')]
In [12]:
def slow_text_finder(lst):
'''
list -> list
Takes a list of filenames and returns those ones that end with '_slow_Text.csv'
'''
return [n for n in lst if n.endswith('_slow_Text.csv')]
In [13]:
for recording in recordings:
flist = os.listdir('%s/%s' % (cwd, recording))
files = slow_setting_finder(flist)
# print('Loading recording %s' % recording)
# print(files)
fnames = ['%s/%s/%s' % (cwd, recording, filename) for filename in files]
vent_settings[recording] = data_loader(fnames)
In [14]:
# remove less important ventilator settings to simplify the table
for recording in recordings:
a = vent_settings[recording][vent_settings[recording].Id != 'FiO2']
a = a[a.Id != 'Ø tube']
a = a[a.Id != 'Tapn']
a = a[a.Id != 'ΔPsupp']
a = a[a.Id != 'Tube Æ']
a = a[a.Id != 'RRsigh']
a = a[a.Id != 'Psigh']
a = a[a.Id != 'Slopesigh']
a = a[a.Unit != 'L']
a = a[a.Id != 'I (I:E)']
a = a[a.Id != 'E (I:E)']
a = a[a.Id != 'MVlow delay']
a = a[a.Id != 'MVhigh delay']
a.drop_duplicates(["Rel.Time [s]", "Name"], inplace = True)
vent_settings_selected[recording] = a
In [15]:
for recording in recordings:
flist = os.listdir('%s/%s' % (cwd, recording))
files = slow_text_finder(flist)
# print('Loading recording %s' % recording)
# print(files)
fnames = ['%s/%s/%s' % (cwd, recording, filename) for filename in files]
vent_modes[recording] = data_loader(fnames)
In [16]:
# remove less important ventilator mode settings to simplify the table
for recording in recordings:
a = vent_modes[recording][vent_modes[recording].Id != 'Device is in neonatal mode']
a = a[a.Id != 'Device is in neonatal mode']
a = a[a.Id != 'Selected CO2 unit is mmHg']
a = a[a.Id != "Selected language (variable text string transmitted with this code, see 'MEDIBUS.X, Rules and Standards for Implementation' for further information)"]
a = a[a.Id != 'Device configured for intubated patient ventilation']
a = a[a.Id != 'Active humid heated']
a = a[a.Id != 'Audio pause active']
a = a[a.Id != 'Active humid unheated']
a = a[a.Id != 'Suction maneuver active']
a.drop_duplicates(["Rel.Time [s]", "Id"], inplace = True)
vent_modes_selected[recording] = a
In [ ]:
def slow_measurement_finder(lst):
'''
list -> list
Takes a list of filenames and returns those ones that end with 'Measurement.csv''
'''
return [n for n in lst if n.endswith('_slow_Measurement.csv')]
In [ ]:
for recording in recordings:
flist = os.listdir('%s/%s' % (cwd, recording))
files = slow_measurement_finder(flist)
# print('Loading recording %s' % recording)
# print(files)
fnames = ['%s/%s/%s' % (cwd, recording, filename) for filename in files]
slow_measurements[recording] = data_loader(fnames)
In [ ]:
# resampling to adjust the time stamps to full seconds
for recording in recordings:
slow_measurements[recording] = slow_measurements[recording].resample('1S').mean()
In [ ]:
# Remove rows which have no DCO2 data - this keeps HFOV periods only
for recording in recordings:
try:
slow_measurements_hfov[recording] = slow_measurements[recording].dropna(subset = ['5001|DCO2 [10*mL^2/s]'])
except KeyError:
print('%s cannot be converted' % recording )
In [ ]:
for recording in recordings:
# Create a column in the DataFrame with the DCO2 data. The original values need to be multiplied by 10
# as in the the downloaded data they are expressed as 1/10th of the DCO2 readings (see original column labels)
slow_measurements_hfov[recording]['DCO2'] = \
DataFrame(slow_measurements_hfov[recording]['5001|DCO2 [10*mL^2/s]'] * 10)
# this normalizes the DCO2 to the body weight. DCO2 = f x VT x VT. If VT is expressed as mL/kg then DCO2_corr
# should be 1/sec x mL/kg x mL/kg = mL^2 / kg^2 / sec
slow_measurements_hfov[recording]['DCO2_corr'] = \
DataFrame(slow_measurements_hfov[recording]['5001|DCO2 [10*mL^2/s]'] * 10 / (current_weights[recording] ** 2))
# This column normalizes the VThf to body weight
slow_measurements_hfov[recording]['VThf_kg'] = \
DataFrame(slow_measurements_hfov[recording]['5001|VThf [mL]'] / current_weights[recording])
# This column normalizs the VThf to body weight squares
slow_measurements_hfov[recording]['VThf_kg_2'] = slow_measurements_hfov[recording]['VThf_kg']**2
In [ ]:
for recording in recordings:
print('%s is being pickled' % recording)
slow_measurements_hfov[recording].to_pickle('%s/%s_slow_measurements_hfov.p' % (dir_write, recording))
In [ ]:
# importing from pickles
for recording in recordings:
slow_measurements_hfov[recording] = pd.read_pickle('%s/%s_slow_measurements_hfov.p' % (dir_write, recording))
In [ ]:
HFOV_periods = {}
for recording in recordings:
start = slow_measurements_hfov[recording].index[0]
end = slow_measurements_hfov[recording].index[-1]
HFOV_periods[recording] = [start, end]
In [ ]:
# How long are the HFOV periods (in seconds) ?
hfov_durations = {}
for recording in recordings:
hfov_durations[recording] = HFOV_periods[recording][1] - HFOV_periods[recording][0]
In [ ]:
def stats_calculator(obj, col):
'''
in: obj (Series or DataFrame), col (str)
out: tuple of size 8
Calculates 8 different descriptive statistics for the data in the 'col' column of a series or dataframe
'''
a = obj[col]
return (a.size, a.mean(), a.std(), a.median(), a.mad(),
a.min(), a.quantile(0.25), a.quantile(0.75), a.max())
In [ ]:
# Descriptive statistics on uncorrected DCO2 data
DCO2_stats_all = []
for recording in recordings:
stats = (stats_calculator(slow_measurements_hfov[recording], 'DCO2'))
frame = DataFrame([stats], columns = ['number_of_seconds', 'mean', 'stdev', 'median', 'mad',
'min', '25pc', '75pc', 'max', ], index = [recording])
DCO2_stats_all.append(frame)
DCO2_stats_all = pd.concat(DCO2_stats_all)
DCO2_stats_all['hours'] = DCO2_stats_all['number_of_seconds'] / 3600
In [ ]:
# Descriptive statistics on weight-squared-corrected DCO2 data
DCO2_corr_stats_all = []
for recording in recordings:
stats = (stats_calculator(slow_measurements_hfov[recording], 'DCO2_corr'))
frame = DataFrame([stats], columns = ['number_of_seconds', 'mean', 'stdev', 'median', 'mad',
'min', '25pc', '75pc', 'max', ], index = [recording])
DCO2_corr_stats_all.append(frame)
DCO2_corr_stats_all = pd.concat(DCO2_corr_stats_all)
In [ ]:
for recording in recordings:
blood_gases[recording] = pd.read_excel('%s/%s' % (cwd, 'data_grabber_gases.xlsx'),
sheetname = recording[:5], header = None)
blood_gases[recording] = DataFrame(blood_gases[recording].T)
blood_gases[recording].columns = blood_gases[recording].ix[0]
blood_gases[recording] = blood_gases[recording][1:]
blood_gases[recording].index = [blood_gases[recording]['Date:'], blood_gases[recording]['Time:']]
In [ ]:
pCO2s = {}
for recording in recordings:
pCO2s[recording] = blood_gases[recording][['pCO2, POC', 'Blood specimen type, POC']]
In [ ]:
# Change the index of pCO2s into single index format
for recording in recordings:
# print('processing CO2 data for %s' % recording)
time_list_all = []
for i in range(len(pCO2s[recording])):
day = str(pCO2s[recording].index[i][0])[:10]
time = str(pCO2s[recording].index[i][1])
date_time = day + ' ' + time
time_list_all.append(date_time)
pCO2s[recording].index = time_list_all
In [ ]:
# Convert the indices of the pCO2s DataFrames to datetime index
for recording in recordings:
try:
pCO2s[recording].index = pd.to_datetime(pCO2s[recording].index)
except:
print('%s cannot be converted' % recording)
In [ ]:
gas_list = {}
In [ ]:
gas_list['DG005_1'] = ['2015-10-13 15:30:00', '2015-10-13 17:38:00', '2015-10-13 21:33:00', '2015-10-13 23:05:00',
'2015-10-14 01:29:00', '2015-10-14 03:56:00', '2015-10-14 07:02:00', '2015-10-14 11:12:00', '2015-10-14 13:10:00',
'2015-10-14 14:07:00', '2015-10-14 14:31:00', '2015-10-14 16:32:00', '2015-10-14 19:08:00', '2015-10-14 22:42:00',
'2015-10-15 00:01:00', '2015-10-15 02:44:00', '2015-10-15 03:57:00', '2015-10-15 05:21:00', '2015-10-15 07:45:00',
'2015-10-15 11:56:00' ]
In [ ]:
gas_list['DG005_2'] = ['2015-10-24 12:33:00', '2015-10-24 13:47:00', ]
In [ ]:
gas_list['DG005_3'] = ['2015-11-04 04:09:00', '2015-11-04 07:24:00', ]
In [ ]:
gas_list['DG006_1'] = [ '2015-10-15 17:45:00', '2015-10-15 20:12:00', '2015-10-15 21:12:00', '2015-10-15 22:30:00',
'2015-10-16 01:13:00', '2015-10-16 03:28:00', '2015-10-16 05:07:00', '2015-10-16 07:17:00',
'2015-10-16 10:59:00', '2015-10-16 15:34:00']
In [ ]:
gas_list['DG009'] = [ '2015-10-26 17:54:00', '2015-10-26 20:08:00', '2015-10-26 21:33:00', '2015-10-27 01:08:00',
'2015-10-27 06:34:00', '2015-10-27 11:56:00', '2015-10-27 13:47:00', '2015-10-27 21:06:00',
'2015-10-28 02:49:00', '2015-10-28 07:04:00', '2015-10-28 09:44:00']
In [ ]:
gas_list['DG016'] = [ '2015-12-07 14:35:00', '2015-12-07 16:51:00']
In [ ]:
gas_list['DG017'] = ['2015-12-08 12:58:00', '2015-12-08 16:44:00', '2015-12-08 19:33:00',
'2015-12-08 22:51:00', '2015-12-08 23:27:00', '2015-12-08 23:34:00',
'2015-12-09 02:06:00', '2015-12-09 03:48:00', '2015-12-09 05:41:00',
'2015-12-09 07:49:00', '2015-12-09 07:56:00', '2015-12-09 09:12:00',
'2015-12-09 09:19:00', '2015-12-09 12:27:00', '2015-12-09 16:37:00',
'2015-12-09 17:39:00', '2015-12-09 20:50:00', '2015-12-10 01:04:00',
'2015-12-10 03:35:00', '2015-12-10 03:37:00', '2015-12-10 04:33:00',
'2015-12-10 10:41:00', '2015-12-10 10:48:00', '2015-12-10 12:29:00',
'2015-12-10 14:20:00', '2015-12-10 17:02:00', '2015-12-10 22:02:00',
'2015-12-11 03:04:00', '2015-12-11 03:12:00', '2015-12-11 06:16:00',
'2015-12-11 06:25:00', '2015-12-11 06:59:00', '2015-12-11 10:21:00',
'2015-12-11 14:16:00']
In [ ]:
gas_list['DG018_1'] = ['2015-12-13 01:27:00',
'2015-12-13 03:43:00', '2015-12-13 05:20:00', '2015-12-13 07:05:00',
'2015-12-13 08:46:00', '2015-12-13 10:55:00', '2015-12-13 11:40:00',
'2015-12-13 12:50:00', '2015-12-13 14:50:00', '2015-12-13 16:50:00',
'2015-12-13 19:40:00', '2015-12-13 22:22:00', '2015-12-14 00:19:00',
'2015-12-14 01:33:00', '2015-12-14 04:37:00', '2015-12-14 06:55:00',
'2015-12-14 09:59:00', '2015-12-14 11:23:00', '2015-12-14 13:07:00']
In [ ]:
gas_list['DG020'] = [ '2015-12-29 11:52:00', '2015-12-29 15:36:00', '2015-12-29 19:13:00', '2015-12-29 22:23:00',
'2015-12-30 02:10:00', '2015-12-30 03:54:00', '2015-12-30 05:43:00', '2015-12-30 07:46:00',
'2015-12-30 09:06:00', '2015-12-30 13:07:00']
In [ ]:
gas_list['DG022'] = ['2016-01-02 12:43:00', '2016-01-02 15:58:00', '2016-01-02 19:46:00',
'2016-01-02 21:41:00', '2016-01-02 22:43:00', '2016-01-03 00:01:00',
'2016-01-03 01:15:00', '2016-01-03 02:14:00', '2016-01-03 03:05:00',
'2016-01-03 03:59:00', '2016-01-03 05:34:00', '2016-01-03 07:37:00',
'2016-01-03 13:28:00', '2016-01-03 18:54:00', '2016-01-03 22:44:00',
'2016-01-05 02:50:00', '2016-01-05 07:09:00', '2016-01-05 14:08:00',
'2016-01-05 23:31:00']
In [ ]:
gas_list['DG025'] = [ '2016-01-21 23:46:00',
'2016-01-22 03:31:00', '2016-01-22 06:27:00', '2016-01-22 09:06:00',
'2016-01-22 11:09:00', '2016-01-22 12:35:00', '2016-01-22 14:14:00',
'2016-01-22 18:03:00', '2016-01-22 22:12:00', '2016-01-23 01:24:00',
'2016-01-23 04:24:00', '2016-01-23 08:46:00', '2016-01-23 10:17:00',
'2016-01-23 14:47:00', '2016-01-23 19:22:00', '2016-01-23 21:41:00',
'2016-01-24 00:45:00', '2016-01-24 07:50:00', '2016-01-24 10:59:00',]
In [ ]:
gas_list['DG027'] = ['2016-01-29 10:48:00', '2016-01-29 11:56:00', '2016-01-29 14:34:00']
In [ ]:
gas_list['DG032_2'] = ['2016-03-22 13:22:00',
'2016-03-22 21:07:00', '2016-03-23 02:17:00', '2016-03-23 11:19:00',
'2016-03-23 18:46:00', '2016-03-23 21:33:00', '2016-03-24 00:15:00',
'2016-03-24 02:13:00', '2016-03-24 06:15:00', '2016-03-24 09:08:00',
'2016-03-24 13:20:00', '2016-03-24 18:19:00', '2016-03-25 01:41:00',
'2016-03-25 03:29:00', '2016-03-25 10:46:00', '2016-03-25 17:44:00',
'2016-03-26 02:54:00', '2016-03-26 03:34:00', '2016-03-26 04:15:00',
'2016-03-26 05:46:00', '2016-03-26 09:02:00']
In [ ]:
gas_list['DG038_1'] = ['2016-05-06 10:34:00', '2016-05-06 14:25:00',
'2016-05-06 17:58:00', '2016-05-06 19:34:00', '2016-05-06 21:22:00',
'2016-05-06 23:36:00', '2016-05-07 03:11:00', '2016-05-07 05:45:00',
'2016-05-07 10:13:00', '2016-05-07 12:45:00', '2016-05-07 16:33:00',
'2016-05-07 19:01:00', '2016-05-08 00:19:00', '2016-05-08 03:54:00',
'2016-05-08 09:13:00', '2016-05-08 13:41:00', '2016-05-08 19:30:00',
'2016-05-08 22:35:00', '2016-05-09 03:53:00', '2016-05-09 10:19:00',
'2016-05-09 11:59:00', '2016-05-09 18:45:00', '2016-05-09 21:53:00',
'2016-05-10 03:06:00', '2016-05-10 07:38:00', '2016-05-10 12:08:00',
'2016-05-10 13:51:00', '2016-05-10 13:59:00', '2016-05-10 17:35:00',
'2016-05-10 23:06:00', '2016-05-11 05:13:00', '2016-05-11 10:03:00',]
In [ ]:
gas_list['DG038_2'] =['2016-05-17 18:01:00',
'2016-05-18 01:14:00', '2016-05-18 07:04:00', '2016-05-18 11:49:00',
'2016-05-18 17:54:00', '2016-05-19 03:08:00', '2016-05-19 12:57:00',
'2016-05-19 17:57:00', '2016-05-20 03:34:00', '2016-05-20 09:09:00',
'2016-05-20 17:20:00', '2016-05-21 00:46:00', '2016-05-21 01:57:00',
'2016-05-21 05:34:00', '2016-05-21 09:35:00', '2016-05-21 14:07:00',
'2016-05-21 18:18:00', '2016-05-21 21:35:00', '2016-05-22 02:25:00',
'2016-05-22 09:43:00']
In [ ]:
gas_list['DG040_1'] = ['2016-06-09 16:24:00',
'2016-06-09 16:57:00', '2016-06-09 19:15:00', '2016-06-09 21:51:00',
'2016-06-09 23:12:00', '2016-06-09 23:21:00', '2016-06-09 23:32:00',
'2016-06-10 01:17:00', '2016-06-10 03:28:00', '2016-06-10 07:36:00',
'2016-06-10 11:34:00', '2016-06-10 11:42:00', '2016-06-10 18:03:00',
'2016-06-10 19:27:00', '2016-06-10 19:41:00', '2016-06-10 22:04:00',
'2016-06-11 02:53:00', '2016-06-11 11:17:00', '2016-06-11 15:45:00',
'2016-06-11 16:47:00']
In [ ]:
gas_list['DG040_2'] = ['2016-06-21 14:17:00', '2016-06-21 18:55:00',
'2016-06-21 23:24:00', '2016-06-22 07:03:00', '2016-06-22 09:55:00',
'2016-06-22 13:36:00', '2016-06-22 13:47:00', '2016-06-22 17:59:00',
'2016-06-22 22:16:00', '2016-06-23 00:36:00', '2016-06-23 05:34:00',
'2016-06-23 09:41:00', '2016-06-23 14:09:00', '2016-06-23 19:05:00',
'2016-06-23 23:00:00', '2016-06-24 04:57:00', '2016-06-24 09:25:00',
'2016-06-24 11:27:00', '2016-06-24 13:33:00', '2016-06-24 19:39:00',
'2016-06-24 22:51:00', '2016-06-24 01:17:00', '2016-06-24 07:24:00']
In [ ]:
gas_list['DG046_1'] = ['2016-07-10 20:40:00',
'2016-07-10 21:45:00', '2016-07-10 23:12:00', '2016-07-11 00:26:00',
'2016-07-11 03:11:00', '2016-07-11 05:43:00', '2016-07-11 07:17:00',
'2016-07-11 09:04:00', '2016-07-11 16:58:00', '2016-07-11 18:18:00',
'2016-07-11 21:44:00', '2016-07-12 01:08:00', '2016-07-12 06:55:00',
'2016-07-12 10:02:00', '2016-07-12 15:09:00']
In [ ]:
# Only keep those pCO2 data when there is HFOV recording
for recording in recordings:
start = gas_list[recording][0]
end = gas_list[recording][-1]
pCO2s[recording] = pCO2s[recording].ix[start:end]
In [ ]:
def DCO2_calculator(recording, column, time, interval, offset, ):
'''
in:
-recording: recording name (string)
-column: column to calculate the statistics on, e.g. DCO2 or DCO2_corr
-time: time of blood gas (string)
-interval: time in minutes (int)
-offset: time in minutes (int)
out:
tuple of 9 values:
- number of DCO2 measurements over the period
- mean
- st deviation
- median
- mad (mean absolute deviation)
- minimum
- 25th centile
- 75th centile
- maximum
Calculates statistics about DCO2 values before a blood gas. The interval in 'interval' long (in minutes)
and it ends 'offset' minutes before the blood gas
'''
#time is given in nanoseconds: 1 min = 60000000000 nanoseconds
end = pd.to_datetime(time) - pd.to_timedelta(offset * 60000000000)
start = end - pd.to_timedelta(interval * 60000000000)
# 1 sec (1000000000 nsec) needs to be taken away otherwise the period will be 901 seconds
# as it includes the last second
data = slow_measurements_hfov[recording][start : end - pd.to_timedelta(1000000000)]
stats = stats_calculator(data, column)
return stats
In [ ]:
# Create a dictionary of DCO2 calculations
DCO2_stats_gases = {}
DCO2_stats_gases[interval, offset] = {}
columns = ['number_of_seconds', 'mean', 'stdev', 'median', 'mad', 'min', '25pc', '75pc', 'max', ]
for recording in recordings:
lst = []
for gas in gas_list[recording]:
lst.append(DCO2_calculator(recording,'DCO2', gas, interval, offset))
stats = DataFrame(lst, columns = columns, index = gas_list[recording])
DCO2_stats_gases[interval, offset][recording] = stats
# add pCO2 data
DCO2_stats_gases[interval, offset][recording] = \
DCO2_stats_gases[interval, offset][recording].join(pCO2s[recording])
# Dropping the rows from the DataFrame that do not have DCO2 or pCO2 values
# Some rows do not have DCO2 values because the baby received conventional ventilation
# during this period (flanked by HFOV periods)
# Some rows do not have pCO2 data because the blood gas was insufficient
DCO2_stats_gases[interval, offset][recording].dropna(axis = 0, how = 'any', subset = ['mean', 'pCO2, POC']
, inplace = True)
In [ ]:
writer = pd.ExcelWriter('%s/%s_%d_%d.xlsx' % (dir_write, 'DCO2_stats_gases', interval, offset) )
for recording in recordings:
DCO2_stats_gases[interval, offset][recording].to_excel(writer, recording)
writer.save()
In [ ]:
# Create a dictionary of weight-squared corrected DCO2 calculations
DCO2_corr_stats_gases = {}
DCO2_corr_stats_gases[interval, offset] = {}
columns = ['number_of_seconds', 'mean', 'stdev', 'median', 'mad', 'min', '25pc', '75pc', 'max', ]
for recording in recordings:
lst = []
for gas in gas_list[recording]:
lst.append(DCO2_calculator(recording,'DCO2_corr', gas, interval, offset))
stats = DataFrame(lst, columns = columns, index = gas_list[recording])
DCO2_corr_stats_gases[interval, offset][recording] = stats
# add pCO2 data
DCO2_corr_stats_gases[interval, offset][recording] = \
DCO2_corr_stats_gases[interval, offset][recording].join(pCO2s[recording])
# Dropping the rows from the DataFrame that do not have DCO2 or pCO2 values
# Some rows do not have DCO2 values because the baby received conventional ventilation
# during this period (flanked by HFOV periods)
# Some rows do not have pCO2 data because the blood gas was insufficient
DCO2_corr_stats_gases[interval, offset][recording].dropna(axis = 0, how = 'any', subset = ['mean', 'pCO2, POC']
, inplace = True)
In [ ]:
writer = pd.ExcelWriter('%s/%s_%d_%d.xlsx' % (dir_write, 'DCO2_corr_stats_gases', interval, offset) )
for recording in recordings:
DCO2_corr_stats_gases[interval, offset][recording].to_excel(writer, recording)
writer.save()
In [ ]:
DCO2_stats_gases_all = {}
lst = []
for recording in recordings:
DCO2_stats_gases[interval, offset][recording]['recording'] = recording
lst.append(DCO2_stats_gases[interval, offset][recording])
DCO2_stats_gases_all[interval, offset] = pd.concat(lst)
In [ ]:
DCO2_stats_gases_all[interval, offset];
In [ ]:
DCO2_corr_stats_gases_all = {}
lst = []
for recording in recordings:
DCO2_corr_stats_gases[interval, offset][recording]['recording'] = recording
lst.append(DCO2_corr_stats_gases[interval, offset][recording])
DCO2_corr_stats_gases_all[interval, offset] = pd.concat(lst)
In [ ]:
DCO2_corr_stats_gases_all[interval, offset];
In [ ]:
writer = pd.ExcelWriter('%s/%s_%d_%d.xlsx' % (dir_write, 'DCO2_stats_all_gases', interval, offset) )
DCO2_stats_gases_all[interval, offset].to_excel(writer, 'DCO2')
writer.save()
In [ ]:
writer = pd.ExcelWriter('%s/%s_%d_%d.xlsx' % (dir_write, 'DCO2_corr_stats_all_gases', interval, offset) )
DCO2_corr_stats_gases_all[interval, offset].to_excel(writer, 'DCO2_corr')
writer.save()
In [ ]:
DCO2_stats_gases_all[interval, offset].to_pickle('%s/%s_%d_%d.p'
% (dir_write, 'DCO2_stats_all_gases', interval, offset))
In [ ]:
# Create a dictionary of DCO2 calculations
VThf_stats_gases = {}
VThf_stats_gases[interval, offset] = {}
columns = ['number_of_seconds', 'mean', 'stdev', 'median', 'mad', 'min', '25pc', '75pc', 'max', ]
for recording in recordings:
lst = []
for gas in gas_list[recording]:
lst.append(DCO2_calculator(recording,'VThf_kg', gas, interval, offset))
stats = DataFrame(lst, columns = columns, index = gas_list[recording])
VThf_stats_gases[interval, offset][recording] = stats
# add pCO2 data
VThf_stats_gases[interval, offset][recording] = \
VThf_stats_gases[interval, offset][recording].join(pCO2s[recording])
# Dropping the rows from the DataFrame that do not have DCO2 or pCO2 values
# Some rows do not have DCO2 values because the baby received conventional ventilation
# during this period (flanked by HFOV periods)
# Some rows do not have pCO2 data because the blood gas was insufficient
VThf_stats_gases[interval, offset][recording].dropna(axis = 0, how = 'any', subset = ['mean', 'pCO2, POC']
, inplace = True)
In [ ]:
VThf_stats_gases_all = {}
lst = []
for recording in recordings:
VThf_stats_gases[interval, offset][recording]['recording'] = recording
lst.append(VThf_stats_gases[interval, offset][recording])
VThf_stats_gases_all[interval, offset] = pd.concat(lst)
In [ ]:
# Create a dictionary of DCO2 calculations
VThf_sq_stats_gases = {}
VThf_sq_stats_gases[interval, offset] = {}
columns = ['number_of_seconds', 'mean', 'stdev', 'median', 'mad', 'min', '25pc', '75pc', 'max', ]
for recording in recordings:
lst = []
for gas in gas_list[recording]:
lst.append(DCO2_calculator(recording,'VThf_kg_2', gas, interval, offset))
stats = DataFrame(lst, columns = columns, index = gas_list[recording])
VThf_sq_stats_gases[interval, offset][recording] = stats
# add pCO2 data
VThf_sq_stats_gases[interval, offset][recording] = \
VThf_sq_stats_gases[interval, offset][recording].join(pCO2s[recording])
# Dropping the rows from the DataFrame that do not have DCO2 or pCO2 values
# Some rows do not have DCO2 values because the baby received conventional ventilation
# during this period (flanked by HFOV periods)
# Some rows do not have pCO2 data because the blood gas was insufficient
VThf_sq_stats_gases[interval, offset][recording].dropna(axis = 0, how = 'any', subset = ['mean', 'pCO2, POC']
, inplace = True)
In [ ]:
VThf_sq_stats_gases_all = {}
lst = []
for recording in recordings:
VThf_sq_stats_gases[interval, offset][recording]['recording'] = recording
lst.append(VThf_sq_stats_gases[interval, offset][recording])
VThf_sq_stats_gases_all[interval, offset] = pd.concat(lst)
In [ ]:
def parameter_calculator(recording, parameter, time, interval, offset, ):
'''
in:
-recording: recording name (string)
-parameter: parameter to calculate the mean on, e.g. DCO2 or DCO2_corr
-time: time of blood gas (string)
-interval: time in minutes (int)
-offset: time in minutes (int)
out:
-
Calculates the mean value of the parameter before a blood gas.
The interval in 'interval' long (in minutes) and it ends 'offset' minutes before the blood gas
'''
# time is given in nanoseconds: 1 min = 60000000000 nanoseconds
end = pd.to_datetime(time) - pd.to_timedelta(offset * 60000000000)
start = end - pd.to_timedelta(interval * 60000000000)
# 1 sec (1000000000 nsec) needs to be taken away otherwise the period will be 901 seconds
# as it includes the last second
data = slow_measurements_hfov[recording][start : end - pd.to_timedelta(1000000000)]
return data[parameter].mean()
In [ ]:
# Create a dictionary of Vthf, DCO2 calculations together with the leak data
DCO2_stats_gases_leak = {}
DCO2_stats_gases_leak[interval, offset] = {}
for recording in recordings:
columns = ['DCO2', 'DCO2_corr', '5001|% leak [%]', '5001|MVleak [L/min]', '5001|MVi [L/min]',
'5001|MVe [L/min]']
par_dict = {}
for gas in gas_list[recording]:
lst = []
lst.append(parameter_calculator(recording,'DCO2', gas, interval, offset))
lst.append(parameter_calculator(recording,'DCO2_corr', gas, interval, offset))
lst.append(parameter_calculator(recording,'5001|% leak [%]', gas, interval, offset))
lst.append(parameter_calculator(recording,'5001|MVleak [L/min]', gas, interval, offset))
lst.append(parameter_calculator(recording,'5001|MVi [L/min]', gas, interval, offset))
lst.append(parameter_calculator(recording,'5001|MVe [L/min]', gas, interval, offset))
par_dict[gas] = lst
DCO2_stats_gases_leak[interval, offset][recording] = \
DataFrame(par_dict).T
DCO2_stats_gases_leak[interval, offset][recording].columns = columns
# add pCO2 data
DCO2_stats_gases_leak[interval, offset][recording] = \
DCO2_stats_gases_leak[interval, offset][recording].join(pCO2s[recording])
# Dropping the rows from the DataFrame that do not have pCO2 values
# Some rows do not have pCO2 data because the blood gas was insufficient
DCO2_stats_gases_leak[interval, offset][recording].dropna(axis = 0, how = 'any', subset = ['pCO2, POC']
, inplace = True)
In [ ]:
DCO2_stats_gases_leak_all = {}
lst = []
for recording in recordings:
DCO2_stats_gases_leak[interval, offset][recording]['recording'] = recording
lst.append(DCO2_stats_gases_leak[interval, offset][recording])
DCO2_stats_gases_leak_all[interval, offset] = pd.concat(lst)
DCO2_stats_gases_leak_all[interval, offset].columns = \
['DCO2', 'DCO2_corr', 'leak%', 'MV_leak', 'MVi', 'MVe', 'pCO2', 'specimen', 'recording']
In [ ]:
# Mean leak more that 5% before the blood gas
len(DCO2_stats_gases_leak_all[10,2][DCO2_stats_gases_leak_all[10,2]['leak%'] > 5]);
In [ ]:
# Mean leak more that 10% before the blood gas
len(DCO2_stats_gases_leak_all[10,2][DCO2_stats_gases_leak_all[10,2]['leak%'] > 10]);
In [ ]:
# Mean leak more that 25% before the blood gas
len(DCO2_stats_gases_leak_all[10,2][DCO2_stats_gases_leak_all[10,2]['leak%'] > 25]);
In [ ]:
# Mean leak more that 50% before the blood gas
len(DCO2_stats_gases_leak_all[10,2][DCO2_stats_gases_leak_all[10,2]['leak%'] > 50]);
In [ ]:
def correl(x, y):
'''
input: two numeric arrays of the same size
returns: a tuple of
1. Pearson's correlation coefficient: r
2. low and high 95% confidence intervals or r (two values)
3. Coefficient of determination: r^2
4. p-value of correlation
'''
assert len(x) == len(y)
r, p = pearsonr(x, y)
f = 0.5*np.log((1+r)/(1-r))
se = 1/np.sqrt(len(x)-3)
ucl = f + 1.96 * se
lcl = f - 1.96 * se
lcl = (np.exp(2*lcl) - 1) / (np.exp(2*lcl) + 1)
ucl = (np.exp(2*ucl) - 1) / (np.exp(2*ucl) + 1)
return r , lcl, ucl , r*r, p
In [ ]:
def visualise_DCO2_corr(rec, i, xlimoffset = 20, ylimoffset = 2, textboxoffset_x = 0.55, textboxoffset_y = 0.9):
'''
input:
- rec = list of recordings (list)
- xlimoffset = how much longer is the X-axis than the highest X value (int)
- ylimoffset = how much longer is the y-axis than the highest y value (int)
- textboxoffset_x = vertical position of the textbox compared to X-lim, fraction (float)
- textboxoffset_y = horizontal position of the textbox compared to y-lim, fraction (float)
Draws a pCO2-DCO2_corr graph using data in recordings 'rec'. Points belonging to different recordings
have different markers and colors.
A correlation line is also drawn.
It also calculates Spearman's correlation coefficient with confidence intervals and p-value.
Puts these data on the graph.
'''
total = DCO2_stats_gases_all[interval, offset]
sample = total.ix[total['recording'] == recording]
x = sample['mean']
y = sample['pCO2, POC']
if len(x) == 0:
return
ax = fig.add_subplot(len(rec),1,i+1);
plt.scatter(x , y, color = 'black', marker = markers[0], s = 60)
plt.ylabel(r'pCO$ \rm _2$ (kPa)', fontsize = 14)
plt.xlabel(r'DCO$ \rm _2$_corr (mL$\rm^2$/sec/kg$^2$)', fontsize = 14)
plt.title(r'%s: Weight-corrected DCO$ \rm _2$' % recording , fontsize = 14)
xlim = ax.get_xlim()[1]
ylim = ax.get_ylim()[1]
plt.xlim([0, xlim + xlimoffset])
plt.ylim([0, ylim + ylimoffset])
# Polynomial Coefficients
coeffs = np.polyfit(x, y, deg = 1)
result = coeffs.tolist()
# Fit a trendline
l = np.poly1d(coeffs)
plt.plot(x,l(x),'r--')
# Calculate pearson's correlation coefficient with confidence intervals, coefficiet of determination and p value
r , lcl, ucl , r2, p = correl(x,y)
p = round(p, 4)
# print the equation on the graph area
text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)\np=%.4f' % (result[0], result[1], r, lcl, ucl, p)
plt.text(xlim * textboxoffset_x, ylim * textboxoffset_y, text, color = 'black', style='normal', fontsize=15,
bbox={'facecolor':'white', 'edgecolor':'red', 'alpha':1, 'pad':10})
text2 = 'y=%.4fx+(%.4f) r=%.4f (%.4f , %.4f) p=%.4f' % (result[0], result[1], r, lcl, ucl, p)
print(text2)
fig.savefig('%s/%s_DCO2_corr.pdf' % (dir_write, recording), dpi = 300, facecolor='w', edgecolor='w',
orientation='portrait', papertype=None, format = 'pdf',
transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)
In [ ]:
def visualise_DCO2_corr_art(rec, i, xlimoffset = 20, ylimoffset = 2, textboxoffset_x = 0.55, textboxoffset_y = 0.9):
'''
input:
- rec = list of recordings (list)
- xlimoffset = how much longer is the X-axis than the highest X value (int)
- ylimoffset = how much longer is the y-axis than the highest y value (int)
- textboxoffset_x = vertical position of the textbox compared to X-lim, fraction (float)
- textboxoffset_y = horizontal position of the textbox compared to y-lim, fraction (float)
Draws a pCO2-DCO2_corr graph using data in recordings 'rec'. Points belonging to different recordings
have different markers and colors. THIS FUNCTION ONLY CONSIDERS ARTERIAL BLOOD GASES.
A correlation line is also drawn.
It also calculates Spearman's correlation coefficient with confidence intervals and p-value.
Puts these data on the graph.
'''
total = DCO2_stats_gases_all[interval, offset]
sample = total.ix[total['recording'] == recording]
sample_art = sample.ix[sample['Blood specimen type, POC'] == 'Arteri...']
x = sample_art['mean']
y = sample_art['pCO2, POC']
if len(x) == 0:
return
ax = fig.add_subplot(len(rec),1,i+1);
plt.scatter(x , y, color = 'black', marker = markers[0], s = 60)
plt.ylabel("pCO2 (kPa)", fontsize = 16)
plt.xlabel("DCO2 (ml^2/kg^2.sec)", fontsize = 16)
plt.title("%s: Weight-corrected DCO2 - arterial pCO2" % recording , fontsize = 18)
xlim = ax.get_xlim()[1]
ylim = ax.get_ylim()[1]
plt.xlim([0, xlim + xlimoffset])
plt.ylim([0, ylim + ylimoffset])
# Polynomial Coefficients
coeffs = np.polyfit(x, y, deg = 1)
result = coeffs.tolist()
# Fit a trendline
l = np.poly1d(coeffs)
plt.plot(x,l(x),'r--')
# Calculate pearson's correlation coefficient with confidence intervals, coefficiet of determination and p value
r , lcl, ucl , r2, p = correl(x,y)
p = round(p, 3)
# print the equation on the graph area
text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)\np=%.3f' % (result[0], result[1], r, lcl, ucl, p)
plt.text(xlim * textboxoffset_x, ylim * textboxoffset_y, text, color = 'black', style='normal', fontsize=15,
bbox={'facecolor':'white', 'edgecolor':'red', 'alpha':1, 'pad':10})
fig.savefig('%s/%s_DCO2_corr_art.pdf' % (dir_write, recording), dpi = 300, facecolor='w', edgecolor='w',
orientation='portrait', papertype=None, format = 'pdf',
transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)
These graphs are shown in Supplementary Figure 3 of the paper.
In [ ]:
# Select only recordings where at least 10 blood gases are available
select_recs = [key for key in pCO2s if len(pCO2s[key]) >= 10]
select_recs = sorted(select_recs)
select_recs;
In [ ]:
recordings = select_recs[0:1]
fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 6, textboxoffset_x = 0.50, textboxoffset_y = 1.1)
plt.close()
In [ ]:
recordings = select_recs[1:2]
fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 4, textboxoffset_x = 0.66, textboxoffset_y = 1.2)
plt.close()
In [ ]:
recordings = select_recs[2:3]
fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.55, textboxoffset_y = 1.2)
plt.close()
In [ ]:
recordings = select_recs[3:4]
fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.5, textboxoffset_y = 1.1)
plt.close()
In [ ]:
recordings = select_recs[4:5]
fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.55, textboxoffset_y = 1.2)
plt.close()
In [ ]:
recordings = select_recs[5:6]
fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.5, textboxoffset_y = 1.2)
plt.close()
In [ ]:
recordings = select_recs[6:7]
fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.45, textboxoffset_y = 1)
plt.close()
In [ ]:
recordings = select_recs[7:8]
fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 1.2, textboxoffset_y = 1.3)
plt.close()
In [ ]:
recordings = select_recs[8:9]
fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.55, textboxoffset_y = 1.1)
plt.close()
In [ ]:
recordings = select_recs[9:10]
fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.55, textboxoffset_y = 1.1)
plt.close()
In [ ]:
recordings = select_recs[10:11]
fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.6, textboxoffset_y = 1.2)
plt.close()
In [ ]:
recordings = select_recs[11:12]
fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.6, textboxoffset_y = 1.2)
plt.close()
In [ ]:
recordings = select_recs[12:13]
fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.75, textboxoffset_y = 1.2)
plt.close()
In [ ]:
recordings = select_recs[13:]
fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
visualise_DCO2_corr(recordings, i, xlimoffset = 20, ylimoffset = 5, textboxoffset_x = 0.55, textboxoffset_y = 1.1)
plt.close()
In [ ]:
# Select only recordings where at least 10 ARTERIAL blood gases are available
select_recs_art = [key for key in pCO2s if
len(pCO2s[key].ix[pCO2s[key]['Blood specimen type, POC'] == 'Arteri...']) >= 10]
select_recs_art = sorted(select_recs_art)
select_recs_art;
In [ ]:
recordings = select_recs_art[0:1]
fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
visualise_DCO2_corr_art(recordings, i, xlimoffset = 20, ylimoffset = 5,
textboxoffset_x = 0.65, textboxoffset_y = 1.3)
plt.close()
In [ ]:
recordings = select_recs_art[1:2]
fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
visualise_DCO2_corr_art(recordings, i, xlimoffset = 20, ylimoffset = 5,
textboxoffset_x = 0.5, textboxoffset_y = 1.2)
plt.close()
In [ ]:
recordings = select_recs_art[2:3]
fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
visualise_DCO2_corr_art(recordings, i, xlimoffset = 20, ylimoffset = 5,
textboxoffset_x = 0.45, textboxoffset_y = 1)
plt.close()
In [ ]:
recordings = select_recs_art[3:4]
fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
visualise_DCO2_corr_art(recordings, i, xlimoffset = 20, ylimoffset = 5,
textboxoffset_x = 1.3, textboxoffset_y = 1.25)
plt.close()
In [ ]:
recordings = select_recs_art[4:5]
fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
visualise_DCO2_corr_art(recordings, i, xlimoffset = 20, ylimoffset = 5,
textboxoffset_x = 0.6, textboxoffset_y = 1.3)
plt.close()
In [ ]:
recordings = select_recs_art[5:6]
fig = plt.figure()
fig.set_size_inches(8,6)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
visualise_DCO2_corr_art(recordings, i, xlimoffset = 20, ylimoffset = 5,
textboxoffset_x = 0.6, textboxoffset_y = 1.2)
plt.close()
In [ ]:
val = range(0, 180, 5)
true_pos = []
false_pos = []
true_neg = []
false_neg = []
a = DCO2_stats_gases_leak_all[10,2]
for i in val:
pos = a[a['DCO2_corr'] >= i]
neg = a[a['DCO2_corr'] < i]
tp = len(pos[pos['pCO2'] <= 8])
fp = len(pos[pos['pCO2'] > 8])
tn = len(neg[neg['pCO2'] > 8])
fn = len(neg[neg['pCO2'] <= 8])
true_pos.append(tp)
false_pos.append(fp)
true_neg.append(tn)
false_neg.append(fn)
In [ ]:
DCO2_test = DataFrame({'tp': true_pos, 'fp': false_pos, 'tn': true_neg, 'fn': false_neg}, index = val )
In [ ]:
DCO2_test['sensitivity'] = round(DCO2_test.tp / (DCO2_test.tp + DCO2_test.fn), 3)
DCO2_test['specificity'] = round(DCO2_test.tn / (DCO2_test.tn + DCO2_test.fp), 3)
DCO2_test['pos_pred_value'] = round(DCO2_test.tp / (DCO2_test.tp + DCO2_test.fp), 3)
DCO2_test['neg_pred_value'] = round(DCO2_test.tn / (DCO2_test.tn + DCO2_test.fn), 3)
DCO2_test['Youden'] = round(DCO2_test.sensitivity + DCO2_test.specificity - 1, 3)
In [ ]:
DCO2_test[DCO2_test.Youden == DCO2_test.Youden.max()];
In [ ]:
DCO2_test['1-specificity'] = 1 - DCO2_test['specificity']
In [ ]:
DCO2_test.sort_values('1-specificity', inplace = True)
In [ ]:
DCO2_test;
In [ ]:
DCO2_test['1-specificity'].iloc[30];
In [ ]:
DCO2_test['sensitivity'].iloc[30];
In [ ]:
AUC = 0 # Area under the ROC curve
for i in range(len(DCO2_test['1-specificity'])-2):
c_high = ((DCO2_test['1-specificity'].iloc[i+1] - DCO2_test['1-specificity'].iloc[i]) *
DCO2_test['sensitivity'].iloc[i+1])
c_low = ((DCO2_test['1-specificity'].iloc[i+1] - DCO2_test['1-specificity'].iloc[i]) *
DCO2_test['sensitivity'].iloc[i])
c = (c_high + c_low) / 2
AUC += c
In [ ]:
s = DCO2_corr_stats_gases_all[interval, offset]
s = s[s['mean'] > 60]
In [ ]:
len(s);
In [ ]:
s[s['pCO2, POC'] > 8];
In [ ]:
s['recording'].unique();
In [ ]:
len(s['recording'].unique());
In [ ]:
s = DCO2_corr_stats_gases_all[interval, offset]
s = s.ix[s['Blood specimen type, POC'] == 'Arteri...']
s = s[s['mean'] > 60]
In [ ]:
len(s);
In [ ]:
s[s['pCO2, POC'] > 8];
In [ ]:
s['recording'].unique();
In [ ]:
len(s['recording'].unique());
In [ ]:
recordings = [ 'DG005_1', 'DG005_2', 'DG005_3', 'DG006_1', 'DG009', 'DG016', 'DG017', 'DG018_1',
'DG020', 'DG022', 'DG025', 'DG027', 'DG032_2', 'DG038_1', 'DG038_2', 'DG040_1', 'DG040_2', 'DG046_1']
In [ ]:
fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
total = DCO2_stats_gases_all[interval, offset]
sample = total.ix[total['recording'] == recording]
x = sample['mean']
y = sample['pCO2, POC']
fig.add_subplot(1,1,1);
plt.scatter(x , y, color = colors[i] , marker = markers[i], s = 25)
plt.ylabel(r'pCO$ \rm _2$ (kPa)', fontsize = 14)
plt.xlabel(r'DCO$ \rm _2$ (mL$\rm^2$/sec)', fontsize = 14)
plt.tick_params(axis='both', which='major', labelsize=14)
plt.xlim([0,600])
plt.title(r'Uncorrected DCO$ \rm _2$ - all gases' , fontsize = 14);
a = DCO2_stats_gases_all[interval, offset]['mean']
b = DCO2_stats_gases_all[interval, offset]['pCO2, POC']
# Polynomial Coefficients
coeffs = np.polyfit(a, b, deg = 1)
result = coeffs.tolist()
# Fit a trendline
l = np.poly1d(coeffs)
plt.plot(a,l(a),'--', color = 'red')
# Calculate pearson's correlation coefficient with confidence intervals, coefficiet of determination and p value
r , lcl, ucl , r2, p = correl(a,b)
# print the equation on the graph area
text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)' % (result[0], result[1], r, lcl, ucl)
plt.text(275, b.max()-2, text, color = 'black', style='normal', fontsize=14,
bbox={'facecolor':'white', 'edgecolor':'black', 'alpha':1, 'pad':10});
fig.savefig('%s/Figure_1_color.tiff' % dir_write, dpi = 300, facecolor='w', edgecolor='w',
orientation='portrait', papertype=None, format = 'tiff',
transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)
plt.close()
In [ ]:
x = DCO2_stats_gases_all[interval, offset]['mean']
y = DCO2_corr_stats_gases_all[interval, offset]['mean']
fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
ax1 = fig.add_subplot(1, 1, 1);
bp = plt.boxplot([x, y] )
plt.setp(bp['boxes'], color='black')
plt.setp(bp['whiskers'], color='black')
plt.setp(bp['medians'], color='red')
plt.setp(bp['fliers'], color='black', marker='+')
plt.ylabel("Units", fontsize = 14)
plt.xlabel("", fontsize = 14)
plt.xticks([1,2], [r'DCO$ \rm _2$ (mL$\rm^2$/sec)', r'DCO$ \rm _2$ (mL$\rm^2$/sec/kg$^2$)'], size = 14)
plt.tick_params(axis='both', which='major', labelsize=14)
plt.title(r'Boxplots of all DCO$ \rm _2$ data' , fontsize = 14);
# calculate p values
t_stats, p = ttest_rel(x, y)
ax1.text(ax1.get_xlim()[1]-1.2, 550, 'p<0.001', color = 'red', style='normal', fontsize=14,
bbox={'facecolor':'white', 'edgecolor':'None', 'alpha':1, 'pad':10});
fig.savefig('%s/Figure_2_color.tiff' % dir_write, dpi = 300, facecolor='w', edgecolor='w',
orientation='portrait', papertype=None, format = 'tiff',
transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)
plt.close()
In [ ]:
fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
for i, recording in enumerate(recordings):
total = DCO2_corr_stats_gases_all[interval, offset]
sample = total.ix[total['recording'] == recording]
x = sample['mean']
y = sample['pCO2, POC']
fig.add_subplot(1,1,1);
ax = plt.scatter(x , y, color = colors[i], marker = markers[i], s = 25)
plt.axvline(x= 50 , linewidth=1, color = 'black')
plt.axhline(y= 8, linewidth=1, color = 'black')
plt.ylabel(r'pCO$ \rm _2$ (kPa)', fontsize = 14)
plt.xlabel(r'DCO$ \rm _2$_corr (mL$\rm^2$/sec/kg$^2$) - all gases', fontsize = 14)
plt.tick_params(axis='both', which='major', labelsize=14)
plt.xlim([0,200])
plt.title(r'Weight-corrected DCO$ \rm _2$ - all gases' , fontsize = 14)
a = DCO2_corr_stats_gases_all[interval, offset]['mean']
b = DCO2_corr_stats_gases_all[interval, offset]['pCO2, POC']
# Polynomial Coefficients
coeffs = np.polyfit(a, b, deg = 1)
result = coeffs.tolist()
# Fit a trendline
l = np.poly1d(coeffs)
plt.plot(a,l(a), '--', color = 'red', linewidth = 2)
# Calculate pearson's correlation coefficient with confidence intervals, coefficiet of determination and p value
r , lcl, ucl , r2, p = correl(a,b)
# print the equation on the graph area
text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)\np<0.001' % (result[0], result[1], r, lcl, ucl)
plt.text(a.max()*0.47, b.max() * 0.85, text, color = 'black', style='normal', fontsize=14,
bbox={'facecolor':'white', 'edgecolor':'black', 'alpha':1, 'pad':10})
fig.savefig('%s/Figure_3_color.tiff' % dir_write, dpi = 300, facecolor='w', edgecolor='w',
orientation='portrait', papertype=None, format = 'tiff',
transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)
plt.close()
In [ ]:
# ROC curve
# A corrected DCO2 > 50 ml/min/kg2 predict a pCO2 < 8 kPa with a senstiivty of 0.390 and a specificity of 0.825
# (Youden score = 0.215)
x = [0, 1]; y = [0, 1]
fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.7)
fig.add_subplot(1,1,1)
text = r'DCO$ \rm _2$corr = 50'
text2 = 'AUC = %.3f' % AUC
plt.plot(1 - DCO2_test.specificity , DCO2_test.sensitivity, c = 'black')
ax = plt.plot(x, y, c = 'black', linestyle = '--', )
plt.vlines(1 - 0.824561, 1 - 0.824561, 0.390, color='k', linewidth = 3)
plt.text(0.05, 0.60, text, color = 'black', style='normal', fontsize=14,
bbox={'facecolor':'white', 'edgecolor':'black', 'alpha':1, 'pad':10});
plt.text(0.6, 0.3, text2, color = 'black', style='normal', fontsize=14,
bbox={'facecolor':'white', 'edgecolor':'black', 'alpha':1, 'pad':10});
plt.title('ROC curve', size = 14)
plt.ylabel('Sensitivity', fontsize = 14)
plt.xlabel('1 - Specificity', fontsize = 14)
plt.grid()
fig.savefig('%s/Figure_4.tiff' % dir_write , dpi = 300, facecolor='w', edgecolor='w',
orientation='portrait', papertype=None, format = 'tiff',
transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)
plt.close()
In [ ]:
fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.7)
subset = DCO2_stats_gases_leak_all[10,2][DCO2_stats_gases_leak_all[10,2]['leak%'] < 10]
for i, recording in enumerate(recordings):
total = subset
sample = total.ix[total['recording'] == recording]
x = sample['DCO2_corr']
y = sample['pCO2']
fig.add_subplot(1,1,1);
plt.scatter(x , y, color = colors[i], marker = markers[i], s = 25)
plt.ylabel(r'pCO$ \rm _2$ (kPa)', fontsize = 14)
plt.xlabel(r'DCO$ \rm _2$_corr (mL$\rm^2$/sec/kg$^2$)', fontsize = 14)
plt.xlim([0,200])
plt.title(r'Weight-corrected DCO$ \rm _2$ - leak < 10%' , fontsize = 14)
a = total['DCO2_corr']
b = total['pCO2']
# Polynomial Coefficients
coeffs = np.polyfit(a, b, deg = 1)
result = coeffs.tolist()
# Fit a trendline
l = np.poly1d(coeffs)
plt.plot(a,l(a),'r--')
# Calculate pearson's correlation coefficient with confidence intervals, coefficiet of determination and p value
r , lcl, ucl , r2, p = correl(a,b)
# print the equation on the graph area
text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)\np<0.001' % (result[0], result[1], r, lcl, ucl)
plt.text(a.max()*0.52, b.max()*0.84, text, color = 'black', style='normal', fontsize=14,
bbox={'facecolor':'white', 'edgecolor':'red', 'alpha':1, 'pad':10})
fig.savefig('%s/Figure_5_color.tiff' % dir_write, dpi = 300, facecolor='w', edgecolor='w',
orientation='portrait', papertype=None, format = 'tiff',
transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)
plt.close()
In [ ]:
fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
fig.suptitle('E-Figure 1', size = 14)
for i, recording in enumerate(recordings):
total = DCO2_stats_gases_all[interval, offset]
sample = total.ix[total['recording'] == recording]
sample_art = sample.ix[sample['Blood specimen type, POC'] == 'Arteri...']
x = sample_art['mean']
y = sample_art['pCO2, POC']
fig.add_subplot(1,1,1);
plt.scatter(x , y, color = colors[i], marker = markers[i], s = 25)
plt.ylabel(r'pCO$ \rm _2$ (kPa)', fontsize = 14)
plt.xlabel(r'DCO$ \rm _2$ (mL$\rm^2$/sec)', fontsize = 14)
plt.tick_params(axis='both', which='major', labelsize=14)
plt.xlim([0,600])
plt.title(r'Uncorrected DCO$ \rm _2$ - arterial gases only' , fontsize = 14);
sample2 = total.ix[total['Blood specimen type, POC'] == 'Arteri...']
a = sample2['mean']
b = sample2['pCO2, POC']
# Polynomial Coefficients
coeffs = np.polyfit(a, b, deg = 1)
result = coeffs.tolist()
# Fit a trendline
l = np.poly1d(coeffs)
plt.plot(a,l(a),'--', color = 'red')
# Calculate pearson's correlation coefficient with confidence intervals, coefficiet of determination and p value
r , lcl, ucl , r2, p = correl(a,b)
# print the equation on the graph area
text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)' % (result[0], result[1], r, lcl, ucl)
plt.text(a.max()*0.48, b.max()-2, text, color = 'black', style='normal', fontsize=14,
bbox={'facecolor':'white', 'edgecolor':'black', 'alpha':1, 'pad':10});
fig.savefig('%s/E_Figure_1_color.tiff' % dir_write, dpi = 300, facecolor='w', edgecolor='w',
orientation='portrait', papertype=None, format = 'tiff',
transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)
plt.close()
In [ ]:
fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.7)
fig.suptitle('E-Figure 2', size = 14)
for i, recording in enumerate(recordings):
total = DCO2_corr_stats_gases_all[interval, offset]
sample = total.ix[total['recording'] == recording]
sample_art = sample.ix[sample['Blood specimen type, POC'] == 'Arteri...']
x = sample_art['mean']
y = sample_art['pCO2, POC']
fig.add_subplot(1,1,1);
plt.scatter(x , y, color = colors[i], marker = markers[i], s = 25)
plt.axvline(x= 60 , linewidth=1, color = 'black')
plt.axhline(y= 8, linewidth=1, color = 'black')
plt.ylabel(r'pCO$ \rm _2$ (kPa)', fontsize = 14)
plt.xlabel(r'DCO$ \rm _2$_corr (mL$\rm^2$/sec/kg$^2$)', fontsize = 14)
plt.tick_params(axis='both', which='major', labelsize=14)
plt.xlim([0,200])
plt.title(r'Weight-corrected DCO$ \rm _2$ - arterial gases only' , fontsize = 14)
sample2 = total.ix[total['Blood specimen type, POC'] == 'Arteri...']
a = sample2['mean']
b = sample2['pCO2, POC']
# Polynomial Coefficients
coeffs = np.polyfit(a, b, deg = 1)
result = coeffs.tolist()
# Fit a trendline
l = np.poly1d(coeffs)
plt.plot(a,l(a), '--', color = 'red', linewidth = 2)
# Calculate pearson's correlation coefficient with confidence intervals, coefficient of determination and p value
r , lcl, ucl , r2, p = correl(a,b)
# print the equation on the graph area
text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)\np=%.3f' % (result[0], result[1], r, lcl, ucl, p)
plt.text(a.max()*0.48, b.max()*0.83, text, color = 'black', style='normal', fontsize=14,
bbox={'facecolor':'white', 'edgecolor':'red', 'alpha':1, 'pad':10});
fig.savefig('%s/E_figure_2_color.tiff' % dir_write, dpi = 300, facecolor='w', edgecolor='w',
orientation='portrait', papertype=None, format = 'tiff',
transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)
plt.close()
For Supplementary Figure 3 of the paper please see the functions creating the individual graphs above.
In [ ]:
fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.7)
fig.suptitle('E-Figure 4', size = 14)
subset = DCO2_stats_gases_leak_all[10,2][DCO2_stats_gases_leak_all[10,2]['leak%'] < 10]
for i, recording in enumerate(recordings):
total = subset
sample = total.ix[total['recording'] == recording]
sample_art = sample.ix[subset['specimen'] == 'Arteri...']
x = sample_art['DCO2_corr']
y = sample_art['pCO2']
fig.add_subplot(1,1,1);
plt.scatter(x , y, color = colors[i], marker = markers[i], s = 25)
plt.ylabel(r'pCO$ \rm _2$ (kPa)', fontsize = 14)
plt.xlabel(r'DCO$ \rm _2$_corr (mL$\rm^2$/sec/kg$^2$)', fontsize = 14)
plt.xlim([0,200])
plt.title(r'Weight-corrected DCO$ \rm _2$ - leak<10%' , fontsize = 14)
sample2 = total.ix[total['specimen'] == 'Arteri...']
a = sample2['DCO2_corr']
b = sample2['pCO2']
# Polynomial Coefficients
coeffs = np.polyfit(a, b, deg = 1)
result = coeffs.tolist()
# Fit a trendline
l = np.poly1d(coeffs)
plt.plot(a,l(a),'--', color = 'red')
# Calculate pearson's correlation coefficient with confidence intervals, coefficiet of determination and p value
r , lcl, ucl , r2, p = correl(a,b)
# print the equation on the graph area
text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)\np=%.3f' % (result[0], result[1], r, lcl, ucl, p)
plt.text(a.max()*0.52, b.max()*0.84, text, color = 'black', style='normal', fontsize=14,
bbox={'facecolor':'white', 'edgecolor':'red', 'alpha':1, 'pad':10})
fig.savefig('%s/E_figure_4_color.tiff' % dir_write, dpi = 300, facecolor='w', edgecolor='w',
orientation='portrait', papertype=None, format = 'tiff',
transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)
plt.close()
In [ ]:
fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.7)
fig.suptitle('E-Figure 5', size = 14)
subset = DCO2_stats_gases_leak_all[10,2][DCO2_stats_gases_leak_all[10,2]['leak%'] >= 10]
for i, recording in enumerate(recordings):
total = subset
sample = total.ix[total['recording'] == recording]
sample_art = sample.ix[subset['specimen'] == 'Arteri...']
x = sample_art['DCO2_corr']
y = sample_art['pCO2']
fig.add_subplot(1,1,1);
plt.scatter(x , y, color = colors[i], marker = markers[i], s = 25)
plt.ylabel(r'pCO$ \rm _2$ (kPa)', fontsize = 14)
plt.xlabel(r'DCO$ \rm _2$_corr (mL$\rm^2$/sec/kg$^2$)', fontsize = 14)
plt.xlim([0,200])
plt.title(r'Weight-corrected DCO$ \rm _2$ - leak>10%' , fontsize = 14)
sample2 = total.ix[total['specimen'] == 'Arteri...']
a = sample2['DCO2_corr']
b = sample2['pCO2']
# Polynomial Coefficients
coeffs = np.polyfit(a, b, deg = 1)
result = coeffs.tolist()
# Fit a trendline
l = np.poly1d(coeffs)
plt.plot(a,l(a),'--', color = 'red')
# Calculate pearson's correlation coefficient with confidence intervals, coefficiet of determination and p value
r , lcl, ucl , r2, p = correl(a,b)
# print the equation on the graph area
text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)\np=%.3f' % (result[0], result[1], r, lcl, ucl, p)
plt.text(a.max()*0.52, b.max()*0.84, text, color = 'black', style='normal', fontsize=14,
bbox={'facecolor':'white', 'edgecolor':'red', 'alpha':1, 'pad':10})
fig.savefig('%s/E_figure_5_color.tiff' % dir_write, dpi = 300, facecolor='w', edgecolor='w',
orientation='portrait', papertype=None, format = 'tiff',
transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)
plt.close()
In [ ]:
fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.7)
fig.suptitle('E-Figure 6', size = 14)
for i, recording in enumerate(recordings):
total = VThf_stats_gases_all[interval, offset]
sample = total.ix[total['recording'] == recording]
x = sample['mean']
y = sample['pCO2, POC']
fig.add_subplot(1,1,1);
plt.scatter(x , y, color = colors[i], marker = markers[i], s = 25)
plt.ylabel(r'pCO$ \rm _2$ (kPa)', fontsize = 14)
plt.xlabel(r'VThf (mL/kg)', fontsize = 14)
plt.xlim([0,5])
# plt.title(r'Tidal volume' , fontsize = 14)
a = VThf_stats_gases_all[interval, offset]['mean']
b = VThf_stats_gases_all[interval, offset]['pCO2, POC']
# Polynomial Coefficients
coeffs = np.polyfit(a, b, deg = 1)
result = coeffs.tolist()
# Fit a trendline
l = np.poly1d(coeffs)
plt.plot(a,l(a),'--', color = 'red')
# Calculate pearson's correlation coefficient with confidence intervals, coefficiet of determination and p value
r , lcl, ucl , r2, p = correl(a,b)
# print the equation on the graph area
text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)\np<0.001' % (result[0], result[1], r, lcl, ucl)
plt.text(a.max()*0.52, b.max()*0.84, text, color = 'black', style='normal', fontsize=14,
bbox={'facecolor':'white', 'edgecolor':'red', 'alpha':1, 'pad':10})
fig.savefig('%s/E_figure_6_color.tiff' % dir_write, dpi = 300, facecolor='w', edgecolor='w',
orientation='portrait', papertype=None, format = 'tiff',
transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)
plt.close()
In [ ]:
fig = plt.figure()
fig.set_size_inches(7, 5)
fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.7)
fig.suptitle('E-Figure 7', size = 14)
for i, recording in enumerate(recordings):
total = VThf_sq_stats_gases_all[interval, offset]
sample = total.ix[total['recording'] == recording]
x = sample['mean']
y = sample['pCO2, POC']
fig.add_subplot(1,1,1);
plt.scatter(x , y, color = colors[i], marker = markers[i], s = 25)
plt.ylabel(r'pCO$ \rm _2$ (kPa)', fontsize = 14)
plt.xlabel(r'VThf (mL$^2$/kg$^2$)', fontsize = 14)
plt.xlabel(r'VThf$^2$ (mL$^2$/kg$^2$)', fontsize = 14)
a = VThf_sq_stats_gases_all[interval, offset]['mean']
b = VThf_sq_stats_gases_all[interval, offset]['pCO2, POC']
# Polynomial Coefficients
coeffs = np.polyfit(a, b, deg = 1)
result = coeffs.tolist()
# Fit a trendline
l = np.poly1d(coeffs)
plt.plot(a,l(a),'--', color = 'red')
# Calculate pearson's correlation coefficient with confidence intervals, coefficiet of determination and p value
r , lcl, ucl , r2, p = correl(a,b)
# print the equation on the graph area
text = 'y=%.4fx+(%.4f)\nr=%.4f (%.4f , %.4f)\np<0.001' % (result[0], result[1], r, lcl, ucl)
plt.text(a.max()*0.43, b.max()*0.83, text, color = 'black', style='normal', fontsize=14,
bbox={'facecolor':'white', 'edgecolor':'red', 'alpha':1, 'pad':10})
fig.savefig('%s/E_Figure_7_color.tiff' % dir_write, dpi = 300, facecolor='w', edgecolor='w',
orientation='portrait', papertype=None, format = 'tiff',
transparent=False, bbox_inches=None, pad_inches=0.1, frameon=True)
plt.close()