ROP Data Analysis 0.1

Call ROPtimes dict, and clean data up


In [90]:
# Import Modules and Identify Low Signals
import numpy as np
import matplotlib.pyplot as plt
from pylab import *
import pandas as pd
from datetime import datetime, timedelta
from pandas import Series, DataFrame, concat
import scipy
import scipy.stats
import scipy.fftpack

#Define any string with 'C' as NaN. 'C' is an indicator for the Masimo as a low signal reading
def readD(val):
    if 'C' in val:
        return np.nan
    return val

%matplotlib inline

In [91]:
import ROPtimes as rt

Baby = 'ROP015' #change depending on which baby you're doing!
BabyDict = getattr(rt, Baby)

In [92]:
dfNIRSpre0 = pd.read_csv('/Users/John/Dropbox/LLU/Projects/Retinopathy of Prematurity/NIRS/Clean/' + Baby + 'NIRS.csv',
                         parse_dates={'timestamp': ['Date',' Time']},
                         index_col='timestamp',
                         usecols=['Date', ' Time', ' Ch2 %StO2'],
                         na_values=['0'])

dfPOpre0 = pd.read_csv('/Users/John/Dropbox/LLU/Projects/Retinopathy of Prematurity/Pulse Ox/' + Baby + 'PO.csv',
                       parse_dates={'timestamp': ['Date','Time']},
                       index_col='timestamp',
                       usecols=['Date', 'Time', 'SpO2', 'PR'],
                       na_values=['0'])

dfPIpre0 = pd.read_csv('/Users/John/Dropbox/LLU/Projects/Retinopathy of Prematurity/Pulse Ox/' + Baby + 'PO.csv',
                       parse_dates={'timestamp': ['Date','Time']},
                       index_col='timestamp',
                       usecols=['Date', 'Time', 'PI', 'Exceptions'],
                       na_values=['0'],
                       converters={'Exceptions':  readD}
                       ) #make a PI df as well to clean it better

dfNIRSpre = dfNIRSpre0.rename(columns={' Ch2 %StO2': 'StO2'}) #rename NIRS column

#Clean the PI dataframe to get rid of rows that have NaN
dfPIpre = dfPIpre0[dfPIpre0.loc[:, ['PI', 'Exceptions']].apply(pd.notnull).all(1)]

dfPOpre = dfPIpre.combine_first(dfPOpre0)
#combine PI dataframe with the rest of the Pulse Ox dataframe after cleaning

'''
Parse_dates tells the read_csv function to combine the date and time column
Into one timestamp column and parse it as a timestamp.
Pandas is smart enough to know how to parse a date in various formats

Index_col sets the timestamp column to be the index.

Usecols tells the read_csv function to select only the subset of the columns.
Na_values is used to turn 0 into NaN

Converters: readD is the dict that means any string with 'C' with be NaN (for PI)
'''


Out[92]:
"\nParse_dates tells the read_csv function to combine the date and time column\nInto one timestamp column and parse it as a timestamp.\nPandas is smart enough to know how to parse a date in various formats\n\nIndex_col sets the timestamp column to be the index.\n\nUsecols tells the read_csv function to select only the subset of the columns.\nNa_values is used to turn 0 into NaN\n\nConverters: readD is the dict that means any string with 'C' with be NaN (for PI)\n"

In [93]:
# Phone almost always equals Philips monitor

# NIRS date/time is 2 mins and 10 seconds slower than phone. Have to correct for it.
# Pulse ox date/time is 1 mins and 32 seconds faster than phone. Have to correct for it.

#This code is to make the combined dataframe come in even seconds.
#The corrected is 1 second, + for NIRS and - for PO.

TCcorrect = timedelta(seconds=1)

ncorr = dfNIRSpre.index+TCcorrect
pcorr = dfPOpre.index-TCcorrect


if dfNIRSpre.index[:1].second % 2 == 0:
    if dfPOpre.index[:1].second % 2 == 0:
        print '\nBoth NIRS and PO indices are even.'
    elif dfPOpre.index[:1].second % 2 != 0:
        print '\nNIRS even, PO odd. PO index corrected.'
        dfPOpre = dfPOpre.set_index(pcorr)
    else:
        raise NameError('Indices are messed up')
elif dfNIRSpre.index[:1].second % 2 != 0:
    if dfPOpre.index[:1].second % 2 == 0:
        print '\nNIRS odd, PO even. NIRS index corrected.'
        dfNIRSpre = dfNIRSpre.set_index(ncorr)
    elif dfPOpre.index[:1].second % 2 != 0:
        print '\nBoth NIRS and PO indices are odd. Both corrected'
        dfNIRSpre = dfNIRSpre.set_index(ncorr)
        dfPOpre = dfPOpre.set_index(pcorr)
else:
    raise NameError('Indices are messed up')

TCnirs = timedelta(minutes=2, seconds=10)
TCpo = timedelta(minutes=1, seconds=32)
# NIRS is slower than correct time, need to add TCnirs to catch it up
# PO is faster than correct time, need to subtract TCpo to slow it down

dfNIRS = dfNIRSpre.set_index([dfNIRSpre.index+TCnirs]) #NIRS Time Correction
dfPO = dfPOpre.set_index([dfPOpre.index-TCpo]) #PO Time Correction


Both NIRS and PO indices are even.

In [94]:
#for babies that only had one machine

dffakePO = pd.DataFrame({'SpO2':0, 'PR':0, 'PI':0}, index=dfNIRS.index)
dffakeNIRS = pd.DataFrame({'StO2':0}, index=dfPO.index)

if len(dfNIRS) > 5:
    if len(dfPO) > 5:
        df = dfNIRS.combine_first(dfPO) #Combine two DataFrame objects and default to non-null values in frame
        print 'Both machines on'
    elif len(dfPO) < 5:
        df = dfNIRS.combine_first(dffakePO)
        print 'Only NIRS on'
elif len(dfNIRS) < 5:
    df = dffakeNIRS.combine_first(dfPO)
    print 'Only Masimo on'
else:
    raise NameError('Check your files')


Both machines on

In [95]:
a = -(df.index[0]-BabyDict['ExamStart']).total_seconds()*0.000277778
print 'Total (h) Baseline recording = %s' % a
b = ((df.index[-1])-BabyDict['ExamEnd']).total_seconds()*0.000277778
print 'Total (h) After Exam recording = %s' % b


Total (h) Baseline recording = 15.748345932
Total (h) After Exam recording = 25.46390926

In [96]:
#Convert to Relative Time and Time Windows
def reltimecalc(ROPNumber, df):
    ROPreltime = {
        'BaselineStart' : (df.index[0].to_datetime()-ROPNumber['ExamStart']).total_seconds(), #should be negative
        'BaselineEnd': (ROPNumber['EyeDrop1'] - ROPNumber['ExamStart']).total_seconds(), #redundant, just for labeling sake
        'EyeDrop1' : (ROPNumber['EyeDrop1'] - ROPNumber['ExamStart']).total_seconds(), #neg
        'EyeDrop2' : (ROPNumber['EyeDrop2'] - ROPNumber['ExamStart']).total_seconds(), #neg
        'EyeDrop3' : (ROPNumber['EyeDrop3'] - ROPNumber['ExamStart']).total_seconds(), #neg
        'ExamStart' : (ROPNumber['ExamStart'] - ROPNumber['ExamStart']).total_seconds(), 
        'ExamEnd' :  (ROPNumber['ExamEnd'] - ROPNumber['ExamStart']).total_seconds(), #positive number
        'DataEnd' : (df.index[-1].to_datetime()-ROPNumber['ExamStart']).total_seconds()
    }
    
    TimeWindows = {
        'tw0' : -10800.0, # three hours before 0
        'tw1' : 10800.0, # three hours after 0
        'tw2' : 21600.0, # six hoours
        'tw3' : 32400.0, # nine hours
        'tw4' : 43200.0, # 12 hours
        'tw5' : 54000.0, # 15 hours
        'tw6' : 64800.0, # 18 hours
        'tw7' : 75600.0, # 21 hours
        'tw8' : 86400.0 #24 hours
    }
    
    return ROPreltime, TimeWindows

In [97]:
tw = timedelta(hours=3)
twend = timedelta(hours=24)
df = df[(BabyDict['ExamStart']-tw):(BabyDict['ExamStart']+twend)]

In [98]:
Data, tw = reltimecalc(BabyDict, df)

In [99]:
df = df.set_index(np.linspace(tw['tw0'],tw['tw8'],len(df)))

In [100]:
df = df[df.PI.between(0.02, 20)]
df = df.drop('Exceptions', 1) # Drop exceptions now that you don't need it anymore
#filter PI values to only allow  =< 0.02 and >= 20

dfen = df.dropna(how='any') #df for no NaN

In [101]:
psa = dfen['SpO2'].values
prs = dfen['StO2'].values

FTOE = pd.Series(((psa-prs)/psa)*100)

dfen = dfen.assign(FTOE=FTOE.values)

Analyis Start

Linear Measures:

Avgs, Std Dev, and Coefficient of Variation

- Avg, std dev, and CoV for every time window
- Avg every 10 sec for 4 mins of ROP exam, std dev, CoV.
- O2 DeSat Counts

In [102]:
a = df[tw['tw0']:0]
b = df[0:tw['tw1']]
c = df[tw['tw1']:tw['tw2']]
d = df[tw['tw2']:tw['tw3']]
e = df[tw['tw3']:tw['tw4']]
f = df[tw['tw4']:tw['tw5']]
g = df[tw['tw5']:tw['tw6']]
h = df[tw['tw6']:tw['tw7']]
i = df[tw['tw7']:tw['tw8']]

dflst = [a, b, c, d, e, f, g, h, i]

tlst = [tw['tw0'], 0, tw['tw1'], tw['tw2'], tw['tw3'], tw['tw4'], tw['tw5'], 
        tw['tw6'], tw['tw7'], tw['tw8']]

#dfen for no na values

ea = dfen[tw['tw0']:0]
eb = dfen[0:tw['tw1']]
ec = dfen[tw['tw1']:tw['tw2']]
ed = dfen[tw['tw2']:tw['tw3']]
ee = dfen[tw['tw3']:tw['tw4']]
ef = dfen[tw['tw4']:tw['tw5']]
eg = dfen[tw['tw5']:tw['tw6']]
eh = dfen[tw['tw6']:tw['tw7']]
ei = dfen[tw['tw7']:tw['tw8']]

dfenlst = [ea, eb, ec, ed, ee, ef, eg, eh, ei] #basically same dataframe but all NaNs dropped
#some functions will return NaN if there's even one NaN present so this is why this is necessary

#entropy functions replace original data, so need to make a copy
dfenlstcopy1 = [] #use this for entropy measures
dfenlstcopy2 = [] #use this for cross entropy meaures
dfenlstcopy3 = [] #use this for baseline cross entropy measures
for i in dfenlst:
    cpy = i.copy(deep=True)
    dfenlstcopy1.append(cpy)
    dfenlstcopy2.append(cpy)
    dfenlstcopy3.append(cpy)

In [103]:
#AVERAGE DURING ROP EXAM FOR FIRST FOUR MINUTES


#tensec needs df['y']

def tensec(a, b): #a = baseline values, b = next time epoch

    blavg = np.nanmean(a.values)
    blstd = np.nanstd(a.values)
    blcv = scipy.stats.variation(a.values, nan_policy='omit')
    
    r =  np.arange(0, 250, 10)
    
    avg = [blavg]
    stdev = [blstd]
    cv = [blcv]
    
    ind = list(np.arange(0, 250, 10))
    ind.insert(0, 'Baseline')
    
    for i in r:
        x = b[i:(i+10)].mean()
        y = b[i:(i+10)].std()
        z = scipy.stats.variation((b[i:(i+10)]).values, nan_policy='omit')
        
        avg.append(x)
        stdev.append(y)
        cv.append(z)
    
    dftensec = pd.DataFrame({
    'a_mean': avg,
    'b_std dev' : stdev,
    'c_CoV' : cv,
    }, index = ind)
    
    return dftensec

In [104]:
PIten = tensec(a['PI'], b['PI'])
PIten.columns = ['PI mean', 'PI std dev', 'PI CV']

PRten = tensec(a['PR'], b['PR'])
PRten.columns = ['PR mean', 'PR std dev', 'PR CV']

SpO2ten = tensec(a['SpO2'], b['SpO2'])
SpO2ten.columns = ['SpO2 mean', 'SpO2 std dev', 'PR CV']

StO2ten = tensec(a['StO2'], b['StO2'])
StO2ten.columns = ["StO2 mean", "StO2 std dev", "StO2 CV"]

FTOEten = tensec(ea['FTOE'], eb['FTOE']) #e means dropped nan
FTOEten.columns = ['FTOE mean', 'FTOE std dev', 'FTOE CV']

In [105]:
def linearmeasures(avglst): 
    
    #mean
    
    PImean = []
    PRmean = []
    SpO2mean = []
    StO2mean = []
    FTOEmean = []
    
    #std dev
    
    PIstd = []
    PRstd = []
    SpO2std = []
    StO2std = []
    FTOEstd = []


    #coefficient of varience
    
    PIcv = []
    PRcv = []
    SpO2cv = []
    StO2cv = []
    FTOEcv = []
   
    
    for i in avglst:

        ax = np.nanmean(i['PI'].values) #mean
        PImean.append(ax)
        az = np.nanstd(i['PI'].values) #std dev
        PIstd.append(az)
        av = scipy.stats.variation(i['PI'].values, nan_policy='omit') #CoV
        PIcv.append(av)
        
        bx = np.nanmean(i['PR'].values) #mean
        PRmean.append(bx)
        bz = np.nanstd(i['PR'].values) #std dev
        PRstd.append(bz)
        bv = scipy.stats.variation(i['PR'].values, nan_policy='omit') #CoV
        PRcv.append(bv)
            
        cx = np.nanmean(i['SpO2'].values) #mean
        SpO2mean.append(cx)
        cz = np.nanstd(i['SpO2'].values) #std dev
        SpO2std.append(cz)
        cv = scipy.stats.variation(i['SpO2'].values, nan_policy='omit') #CoV
        SpO2cv.append(cv)
            
        dx = np.nanmean(i['StO2'].values) #mean
        StO2mean.append(dx)
        dz = np.nanstd(i['StO2'].values) #std dev
        StO2std.append(dz)
        dv = scipy.stats.variation(i['StO2'].values, nan_policy='omit') #CoV
        StO2cv.append(dv)
        
        ex = np.nanmean(i['FTOE'].values) #mean
        FTOEmean.append(ex)
        ez = np.nanstd(i['FTOE'].values) #std dev
        FTOEstd.append(ez)
        ev = scipy.stats.variation(i['FTOE'].values, nan_policy='omit') #CoV
        FTOEcv.append(ev)
            
    
    PIlin = pd.DataFrame({
    'a_PI mean': PImean,
    'b_PI std dev' : PIstd,
    'c_PI CoV' : PIcv,
    }, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
    
    PRlin = pd.DataFrame({
    'a_PR mean': PRmean,
    'b_PR std dev' : PRstd,
    'c_PR CoV' : PRcv,
    }, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
    
    SpO2lin = pd.DataFrame({
    'a_SpO2 mean': SpO2mean,
    'b_SpO2 std dev' : SpO2std,
    'c_SpO2 CoV' : SpO2cv,
    }, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
    
    StO2lin = pd.DataFrame({
    'a_StO2 mean': StO2mean,
    'b_StO2 std dev' : StO2std,
    'c_StO2 CoV' : StO2cv,
    }, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
    
    FTOElin = pd.DataFrame({
    'a_FTOE mean': FTOEmean,
    'b_FTOE std dev' : FTOEstd,
    'c_FTOE CoV' : FTOEcv,
    }, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
    
    dfCoV = pd.DataFrame({
    'PI CoV' : PIcv,
    'PR CoV' : PRcv,
    'SpO2 CoV' : SpO2cv,
    'StO2 CoV' : StO2cv,
    'FTOE CoV' : FTOEcv
    }, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
    
    
    return PIlin, PRlin, SpO2lin, StO2lin, FTOElin, dfCoV

In [106]:
PIlin, PRlin, SpO2lin, StO2lin, FTOElin, dfCoV = linearmeasures(dfenlst) #USE dfenlst

In [107]:
from IPython.display import display
display(PIlin)
display(PRlin)
display(SpO2lin)
display(StO2lin)
display(FTOElin)


a_PI mean b_PI std dev c_PI CoV
-3:0 1.400518 0.607776 0.433965
0:3 1.470180 0.408768 0.278039
3:6 1.328343 0.666908 0.502060
6:9 1.516548 0.507242 0.334471
9:12 1.880443 0.617186 0.328213
12:15 2.044417 0.810196 0.396297
15:18 1.722643 0.592473 0.343933
18:21 1.780041 0.529919 0.297700
21:24 1.238639 0.605162 0.488570
a_PR mean b_PR std dev c_PR CoV
-3:0 147.459532 15.811006 0.107223
0:3 148.137215 17.468538 0.117921
3:6 146.487619 15.656282 0.106878
6:9 154.990661 15.570040 0.100458
9:12 150.472088 14.765096 0.098125
12:15 164.673462 16.413125 0.099671
15:18 151.246552 14.890699 0.098453
18:21 156.654985 14.957723 0.095482
21:24 153.593688 17.143984 0.111619
a_SpO2 mean b_SpO2 std dev c_SpO2 CoV
-3:0 98.768316 1.969275 0.019938
0:3 97.741483 2.303449 0.023567
3:6 98.665524 1.748614 0.017723
6:9 98.020172 1.972344 0.020122
9:12 98.063082 1.734714 0.017690
12:15 98.151674 2.144752 0.021851
15:18 97.956578 2.278130 0.023257
18:21 97.970015 1.944758 0.019851
21:24 98.817949 1.833986 0.018559
a_StO2 mean b_StO2 std dev c_StO2 CoV
-3:0 78.951285 4.364783 0.055285
0:3 79.807937 4.488576 0.056242
3:6 78.421524 3.891876 0.049628
6:9 78.064625 3.017061 0.038648
9:12 79.340901 3.176529 0.040036
12:15 78.302226 3.366731 0.042997
15:18 81.875885 3.955571 0.048312
18:21 77.831709 3.228982 0.041487
21:24 81.162525 5.011192 0.061743
a_FTOE mean b_FTOE std dev c_FTOE CoV
-3:0 20.027616 4.778722 0.238607
0:3 18.295492 5.087889 0.278095
3:6 20.489832 4.247597 0.207303
6:9 20.323137 3.546801 0.174520
9:12 19.063551 3.603959 0.189050
12:15 20.174427 4.068943 0.201688
15:18 16.356134 4.786585 0.292648
18:21 20.521799 3.698035 0.180200
21:24 17.821530 5.556991 0.311813

In [108]:
dfCoV.plot() #CV graph
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()


Pearson Correlation Coefficients of 3 Hour Epochs with graph to see trend


In [109]:
ar = dfen[tw['tw0']:0].corr()
#excludes NaN values
#pearson correlation coefficeint

#pearson, kendall or spearman?
br = dfen[0:tw['tw1']].corr()
cr = dfen[tw['tw1']:tw['tw2']].corr()
dr = dfen[tw['tw2']:tw['tw3']].corr()
er = dfen[tw['tw3']:tw['tw4']].corr()
fr = dfen[tw['tw4']:tw['tw5']].corr()
gr = dfen[tw['tw5']:tw['tw6']].corr()
hr = dfen[tw['tw6']:tw['tw7']].corr()
ir = dfen[tw['tw7']:tw['tw8']].corr()

dfcorrlst = [ar, br, cr, dr, er, fr, gr, hr, ir]

In [110]:
#      0    1    2     3   4
#      PI, PR, SpO2, StO2, FTOE
# PI   aa  ab    ac    ad  ae
# PR   ba  bb    bc    bd  be
# SpO2 ca  cb    cc    cd  ce
# StO2 da  db    dc    dd  de
# FTOE ea  eb    ec    ed  ee

def corrlst(dflst):
    
    a = [] #ab
    b = [] #ac
    c = [] #ad
    d = [] #ae
    e = [] #bc
    f = [] #bd
    g = [] #be
    h = [] #cd
    i = [] #ce
    j = [] #de
    
    for x in dflst:

        ab = x['PI'][1] #ab
        a.append(ab)
        ac = x['PI'][2] #ac 
        b.append(ac)
        ad = x['PI'][3] #ad
        c.append(ad)
        ae = x['PI'][4] #ae
        d.append(ae)
        bc = x['PR'][2] #bc
        e.append(bc)
        bd = x['PR'][3] #bd
        f.append(bd)
        be = x['PR'][4] #be
        g.append(be)
        cd = x['SpO2'][3] #cd
        h.append(cd)
        ce = x['SpO2'][4] #ce
        i.append(ce)
        de = x['StO2'][4] #de
        j.append(de)
    
    rgraph = pd.DataFrame({
    'PI:PR': a,
    'PI:SpO2' : b,
    'PI:StO2' : c,
    'PI:FTOE' : d,
    'PR:SpO2' : e,
    'PR:StO2' : f,
    'PR:FTOE' : g,
    'SpO2:StO2' : h,
    'SpO2:FTOE' : i,
    'StO2:FTOE': j,
    }, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
    
    return rgraph

In [111]:
rgraph = corrlst(dfcorrlst)

In [112]:
PIg = rgraph.ix[:,0:4]
PRg = rgraph[['PI:PR', 'PR:FTOE', 'PR:SpO2', 'PR:StO2']]
SpO2g = rgraph[['PI:SpO2', 'PR:SpO2',  'SpO2:StO2', 'SpO2:FTOE']]
StO2g = rgraph[['PI:StO2', 'PR:StO2', 'SpO2:StO2', 'StO2:FTOE']]
FTOEg = rgraph[['PI:FTOE', 'PR:FTOE', 'SpO2:FTOE', 'StO2:FTOE']]

In [113]:
display(PIg, PRg, SpO2g, StO2g, FTOEg)


PI:FTOE PI:PR PI:SpO2 PI:StO2
-3:0 0.233640 0.013749 -0.118383 -0.298969
0:3 0.183815 -0.195262 -0.153697 -0.275949
3:6 0.095198 0.022819 -0.130706 -0.147530
6:9 0.135511 -0.095433 0.099453 -0.104914
9:12 0.124760 0.292634 -0.182124 -0.218045
12:15 -0.011576 -0.031112 -0.095576 -0.035857
15:18 -0.047382 -0.167565 -0.091544 0.007187
18:21 0.163204 0.013000 -0.089759 -0.228809
21:24 0.009394 -0.074790 0.037989 0.001988
PI:PR PR:FTOE PR:SpO2 PR:StO2
-3:0 0.013749 0.077095 -0.130241 -0.125067
0:3 -0.195262 0.357436 0.024431 -0.383907
3:6 0.022819 0.088692 -0.110004 -0.134817
6:9 -0.095433 0.081715 -0.040494 -0.114552
9:12 0.292634 0.205445 -0.225870 -0.330577
12:15 -0.031112 0.207018 -0.062346 -0.280120
15:18 -0.167565 0.181063 -0.059224 -0.246554
18:21 0.013000 0.159951 -0.162107 -0.260643
21:24 -0.074790 -0.080035 -0.206098 0.024120
PI:SpO2 PR:SpO2 SpO2:StO2 SpO2:FTOE
-3:0 -0.118383 -0.130241 -0.005173 0.383524
0:3 -0.153697 0.024431 -0.025551 0.437388
3:6 -0.130706 -0.110004 -0.022274 0.371598
6:9 0.099453 -0.040494 -0.024701 0.497076
9:12 -0.182124 -0.225870 -0.035578 0.445869
12:15 -0.095576 -0.062346 -0.105365 0.548999
15:18 -0.091544 -0.059224 -0.107909 0.539084
18:21 -0.089759 -0.162107 -0.014472 0.460192
21:24 0.037989 -0.206098 -0.153297 0.437165
PI:StO2 PR:StO2 SpO2:StO2 StO2:FTOE
-3:0 -0.298969 -0.125067 -0.005173 -0.924194
0:3 -0.275949 -0.383907 -0.025551 -0.909062
3:6 -0.147530 -0.134817 -0.022274 -0.936017
6:9 -0.104914 -0.114552 -0.024701 -0.879214
9:12 -0.218045 -0.330577 -0.035578 -0.910117
12:15 -0.035857 -0.280120 -0.105365 -0.888480
15:18 0.007187 -0.246554 -0.107909 -0.894428
18:21 -0.228809 -0.260643 -0.014472 -0.893971
21:24 0.001988 0.024120 -0.153297 -0.955420
PI:FTOE PR:FTOE SpO2:FTOE StO2:FTOE
-3:0 0.233640 0.077095 0.383524 -0.924194
0:3 0.183815 0.357436 0.437388 -0.909062
3:6 0.095198 0.088692 0.371598 -0.936017
6:9 0.135511 0.081715 0.497076 -0.879214
9:12 0.124760 0.205445 0.445869 -0.910117
12:15 -0.011576 0.207018 0.548999 -0.888480
15:18 -0.047382 0.181063 0.539084 -0.894428
18:21 0.163204 0.159951 0.460192 -0.893971
21:24 0.009394 -0.080035 0.437165 -0.955420

In [114]:
PIg.plot(grid=True)
plt.hlines(0,0,8,colors='k', lw=5)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()



In [115]:
PRg.plot(grid=True)
plt.hlines(0,0,8,colors='k', lw=5)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()



In [116]:
SpO2g.plot(grid=True)
plt.hlines(0,0,8,colors='k', lw=5)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()



In [117]:
StO2g.plot(grid=True)
plt.hlines(0,0,8,colors='k', lw=5)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()



In [118]:
FTOEg.plot(grid=True)
plt.hlines(0,0,8,colors='k', lw=5)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()


Plots comparing baseline and 3H after ROP Exam


In [119]:
def savitzky_golay(y, window_size, order, deriv=0, rate=1):
    r"""Smooth (and optionally differentiate) data with a Savitzky-Golay filter.
    The Savitzky-Golay filter removes high frequency noise from data.
    It has the advantage of preserving the original shape and
    features of the signal better than other types of filtering
    approaches, such as moving averages techniques.
    
    Parameters
    ----------
    y : array_like, shape (N,)
        the values of the time history of the signal.
    window_size : int
        the length of the window. Must be an odd integer number.
    order : int
        the order of the polynomial used in the filtering.
        Must be less then `window_size` - 1.
    deriv: int
        the order of the derivative to compute (default = 0 means only smoothing)
    
    Returns
    -------
    ys : ndarray, shape (N)
        the smoothed signal (or it's n-th derivative).
    
    Notes
    -----
    The Savitzky-Golay is a type of low-pass filter, particularly
    suited for smoothing noisy data. The main idea behind this
    approach is to make for each point a least-square fit with a
    polynomial of high order over a odd-sized window centered at
    the point.
    
    Examples
    --------
    t = np.linspace(-4, 4, 500)
    y = np.exp( -t**2 ) + np.random.normal(0, 0.05, t.shape)
    ysg = savitzky_golay(y, window_size=31, order=4)
    import matplotlib.pyplot as plt
    plt.plot(t, y, label='Noisy signal')
    plt.plot(t, np.exp(-t**2), 'k', lw=1.5, label='Original signal')
    plt.plot(t, ysg, 'r', label='Filtered signal')
    plt.legend()
    plt.show()
    
    References
    ----------
    .. [1] A. Savitzky, M. J. E. Golay, Smoothing and Differentiation of
       Data by Simplified Least Squares Procedures. Analytical
       Chemistry, 1964, 36 (8), pp 1627-1639.
    .. [2] Numerical Recipes 3rd Edition: The Art of Scientific Computing
       W.H. Press, S.A. Teukolsky, W.T. Vetterling, B.P. Flannery
       Cambridge University Press ISBN-13: 9780521880688
    """
    import numpy as np
    from math import factorial

    try:
        window_size = np.abs(np.int(window_size))
        order = np.abs(np.int(order))
    except ValueError, msg:
        raise ValueError("window_size and order have to be of type int")
    if window_size % 2 != 1 or window_size < 1:
        raise TypeError("window_size size must be a positive odd number")
    if window_size < order + 2:
        raise TypeError("window_size is too small for the polynomials order")
    order_range = range(order+1)
    half_window = (window_size -1) // 2
    # precompute coefficients
    b = np.mat([[k**i for i in order_range] for k in range(-half_window, half_window+1)])
    m = np.linalg.pinv(b).A[deriv] * rate**deriv * factorial(deriv)
    # pad the signal at the extremes with
    # values taken from the signal itself
    firstvals = y[0] - np.abs( y[1:half_window+1][::-1] - y[0] )
    lastvals = y[-1] + np.abs(y[-half_window-1:-1][::-1] - y[-1])
    y = np.concatenate((firstvals, y, lastvals))
    return np.convolve( m[::-1], y, mode='valid')

In [120]:
ypsg = savitzky_golay(dfen['SpO2'].values, 31, order=4, deriv=0, rate=1)
yrsg = savitzky_golay(dfen['PR'].values, 31, order=4, deriv=0, rate=1)
ytsg = savitzky_golay(dfen['StO2'].values, 31, order=4, deriv=0, rate=1)
yi = dfen['PI'].values #don't filter PI values
yf = dfen['FTOE'].values #don't filter FTOE

dfgraph = pd.DataFrame({'SpO2':ypsg, 'PR':yrsg, 'StO2':ytsg, 'PI':yi, 'FTOE':yf}, index=dfen.index)

In [121]:
fig, axes = plt.subplots(nrows=2, ncols=1)

dfgraph['StO2'][-10800:0].plot(ax=axes[0]); axes[0].set_title('3h Baseline StO2');
dfgraph['StO2'][0:10800].plot(ax=axes[1], color = 'r'); axes[1].set_title('3h After Exam StO2');

plt.tight_layout()
plt.show()



In [122]:
fig, axes = plt.subplots(nrows=2, ncols=1)

dfgraph['SpO2'][-10800:0].plot(ax=axes[0]); axes[0].set_title('3h Baseline SpO2')
dfgraph['SpO2'][0:10800].plot(ax=axes[1], color = 'r'); axes[1].set_title('3h After Exam SpO2');


plt.tight_layout()
plt.show()



In [123]:
fig, axes = plt.subplots(nrows=2, ncols=1)

dfgraph['PR'][-10800:0].plot(ax=axes[0]); axes[0].set_title('3h Baseline PR');
dfgraph['PR'][0:10800].plot(ax=axes[1], color = 'r'); axes[1].set_title('3h After Exam PR');

plt.tight_layout()
plt.show()



In [124]:
fig, axes = plt.subplots(nrows=2, ncols=1)

dfgraph['PI'][-10800:0].plot(ax=axes[0]); axes[0].set_title('3h Baseline PI');
dfgraph['PI'][0:10800].plot(ax=axes[1], color = 'r'); axes[1].set_title('After Exam PI');

plt.tight_layout()
plt.show()



In [125]:
fig, axes = plt.subplots(nrows=2, ncols=1)

dfgraph['FTOE'][-10800:0].plot(ax=axes[0]); axes[0].set_title('3h Baseline PI');
dfgraph['FTOE'][0:10800].plot(ax=axes[1], color = 'r'); axes[1].set_title('After Exam PI');

plt.tight_layout()
plt.show()


Information Theory

- Fuzzy Entropy
- Cross Fuzzy Entropy

In [126]:
from entropy import *

#def fuzzyen(x, dim, r, n, scale=True):
#    return entropy(x, dim, r, n=n, scale=scale, remove_baseline=True)

def FuzzyEnLst(dfenlstcopy1):
    
    PI_FE = []
    PR_FE = []
    SpO2_FE = []
    StO2_FE = []
    FTOE_FE = []
    
    for i in dfenlstcopy1:
        a = fuzzyen(i['PI'].values, 2, .2*np.nanstd(i['PI'].values), n=1)
        b = fuzzyen(i['PR'].values, 2, .2*np.nanstd(i['PR'].values), n=1)
        c = fuzzyen(i['SpO2'].values, 2, .2*np.nanstd(i['SpO2'].values), n=1)
        d = fuzzyen(i['StO2'].values, 2, .2*np.nanstd(i['StO2'].values), n=1)
        e = fuzzyen(i['FTOE'].values, 2, .2*np.nanstd(i['FTOE'].values), n=1)

        PI_FE.append(a)
        PR_FE.append(b)
        SpO2_FE.append(c)
        StO2_FE.append(d)
        FTOE_FE.append(e)
    
    FuzzyEnDf = pd.DataFrame({
    'PI FuzzyEn': PI_FE,
    'PR FuzzyEn' : PR_FE,
    'SpO2 FuzzyEn' : SpO2_FE,
    'StO2 FuzzyEn' : StO2_FE,
    'FTOE FuzzyEn' : FTOE_FE
    }, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
    
    return FuzzyEnDf

In [127]:
FuzzyEnDf = FuzzyEnLst(dfenlstcopy1)

In [ ]:
FuzzyEnDf['PI FuzzyEn'].plot(grid=True)

In [ ]:
FuzzyEnDf['PR FuzzyEn'].plot(grid=True)

In [ ]:
FuzzyEnDf['SpO2 FuzzyEn'].plot(grid=True)

In [ ]:
FuzzyEnDf['StO2 FuzzyEn'].plot(grid=True)

In [ ]:
FuzzyEnDf['FTOE FuzzyEn'].plot(grid=True)

In [ ]:
FuzzyEnDf.plot(grid=True)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))

In [ ]:
#def cross_fuzzyen(x1, x2, m, r, n, scale=True):
#    return entropy([x1, x2], dim, r, n, scale=scale, remove_baseline=True)

def CrossFuzzyLst(dfenlstcopy2):
    
#Cross Fuzzy Entropy between variables

    a = [] #ab
    b = [] #ac
    c = [] #ad
    d = [] #ae
    e = [] #bc
    f = [] #bd
    g = [] #be
    h = [] #cd
    i = [] #ce
    j = [] #de
    
    rlst = [a, b, c, d, e, f, g, h, i, j]
    
    #      0    1    2     3   4
    #      PI, PR, SpO2, StO2, FTOE
    # PI   aa  ab    ac    ad  ae
    # PR   ba  bb    bc    bd  be
    # SpO2 ca  cb    cc    cd  ce
    # StO2 da  db    dc    dd  de
    # FTOE ea  eb    ec    ed  ee
    
    for x in dfenlstcopy2:

        ab = cross_fuzzyen(x['PI'].values, x['PR'].values, 2, .2, n=1) #ab
        a.append(ab)
        ac = cross_fuzzyen(x['PI'].values, x['SpO2'].values, 2, .2, n=1) #ac
        b.append(ac)
        ad = cross_fuzzyen(x['PI'].values, x['StO2'].values, 2, .2, n=1) #ad
        c.append(ad)
        ae = cross_fuzzyen(x['PI'].values, x['FTOE'].values, 2, .2, n=1) #ae
        d.append(ae)
        bc = cross_fuzzyen(x['PR'].values, x['SpO2'].values, 2, .2, n=1) #bc
        e.append(bc)
        bd = cross_fuzzyen(x['PR'].values, x['StO2'].values, 2, .2, n=1) #bd
        f.append(bd)
        be = cross_fuzzyen(x['PR'].values, x['FTOE'].values, 2, .2, n=1) #be
        g.append(be)
        cd = cross_fuzzyen(x['SpO2'].values, x['StO2'].values, 2, .2, n=1) #cd
        h.append(cd)
        ce = cross_fuzzyen(x['SpO2'].values, x['FTOE'].values, 2, .2, n=1) #ce
        i.append(ce)
        de = cross_fuzzyen(x['StO2'].values, x['FTOE'].values, 2, .2, n=1) #de
        j.append(de)
    
    XFuzzyEnDf = pd.DataFrame({
    'xf PI:PR': a,
    'xf PI:SpO2' : b,
    'xf PI:StO2' : c,
    'xf PI:FTOE' : d,
    'xf PR:SpO2' : e,
    'xf PR:StO2' : f,
    'xf PR:FTOE' : g,
    'xf SpO2:StO2' : h,
    'xf SpO2:FTOE' : i,
    'xf StO2:FTOE': j,
    }, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
    
    return XFuzzyEnDf

In [ ]:
xengraph = CrossFuzzyLst(dfenlstcopy2) #error up to 4 decimal places

In [ ]:
PIxg = xengraph.ix[:,0:4]
PRxg = xengraph[['xf PI:PR', 'xf PR:FTOE', 'xf PR:SpO2', 'xf PR:StO2']]
SpO2xg = xengraph[['xf PI:SpO2', 'xf PR:SpO2',  'xf SpO2:StO2', 'xf SpO2:FTOE']]
StO2xg = xengraph[['xf PI:StO2', 'xf PR:StO2', 'xf SpO2:StO2', 'xf StO2:FTOE']]
FTOExg = xengraph[['xf PI:FTOE', 'xf PR:FTOE', 'xf SpO2:FTOE', 'xf StO2:FTOE']]

In [ ]:
display(PIxg, PRxg, SpO2xg, StO2xg, FTOExg)

In [ ]:
PIxg.plot(grid=True)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()

In [ ]:
PRxg.plot(grid=True)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()

In [ ]:
SpO2xg.plot(grid=True)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()

In [ ]:
StO2xg.plot(grid=True)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()

In [ ]:
FTOExg.plot(grid=True)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()

In [ ]:
def OrderDetn(x1, x2):
    
    x1
    
    #Takes the length of two arrays and
    #arranges them from least to greatest
    #return (smaller x, bigger x)
    if len(x1) <= len(x2):
        return x1, x2
    else:
        return x2, x1

In [ ]:
#def cross_fuzzyen(x1, x2, m, r, n, scale=True):
#    return entropy([x1, x2], dim, r, n, scale=scale, remove_baseline=True)

def CrossFuzzyLst(dfenlstcopy3):
    
    #cross entropy of baseline across different time epochs
    #comparison across the same variable
    
    #len(x1) <= len(x2) for it to work

    PIxbl = []
    PRxbl = []
    SpO2xbl = []
    StO2xbl = []
    FTOExbl = []
    
    for i in np.arange(0, 9): #9 time epochs
    
        a1, a2 = OrderDetn(dfenlstcopy3[0]['PI'].values, dfenlstcopy3[i]['PI'].values)
        b1, b2 = OrderDetn(dfenlstcopy3[0]['PR'].values, dfenlstcopy3[i]['PR'].values)
        c1, c2 = OrderDetn(dfenlstcopy3[0]['SpO2'].values, dfenlstcopy3[i]['SpO2'].values)
        d1, d2 = OrderDetn(dfenlstcopy3[0]['StO2'].values, dfenlstcopy3[i]['StO2'].values)
        e1, e2 = OrderDetn(dfenlstcopy3[0]['FTOE'].values, dfenlstcopy3[i]['FTOE'].values)
    
        a = cross_fuzzyen(a1, a2, 2, .2, n=1)
        b = cross_fuzzyen(b1, b2, 2, .2, n=1)
        c = cross_fuzzyen(c1, c2, 2, .2, n=1) 
        d = cross_fuzzyen(d1, d2, 2, .2, n=1)
        e = cross_fuzzyen(e1, e2, 2, .2, n=1)
    
        PIxbl.append(a)
        PRxbl.append(b)
        SpO2xbl.append(c)
        StO2xbl.append(d)
        FTOExbl.append(e)

    XblFuzzyEnDf = pd.DataFrame({
    'BL X FuzzyEn PI': PIxbl,
    'BL X FuzzyEn PR' : PRxbl,
    'BL X FuzzyEn SpO2' : SpO2xbl,
    'BL X FuzzyEn StO2' : StO2xbl,
    'BL X FuzzyEn FTOE' : FTOExbl,
    }, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
    
    return XblFuzzyEnDf

In [ ]:
xblengraph = CrossFuzzyLst(dfenlstcopy3)

In [ ]:
xblengraph

In [ ]:
xblengraph.plot(grid=True)
plt.title('Cross Fuzzy Entropy', color='black')
plt.tight_layout()
plt.axvline(1, color='y', linestyle=':', lw='4') #vertical line at ROP Exam time
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()

Chaos Theory

-Lempel-Ziv complexity
-Lyapunov largest exponent
-Hurst exponent

In [ ]:
# Scripts for Chaos Theory based variables
# Import from other scientists
#

# #1
from analysis import *
    #Source: https://github.com/thelahunginjeet/pydynet/blob/master/analysis.py
    #def lz_complexity(s):
    #Aboy 2006: turn into two bit data based off threshold (threshold = median)
    #turn data into a string of 0's and 1's

# #2
def LLEcal(data):
    #Source: http://systems-sciences.uni-graz.at/etextbook/sw2/lyapunov.html
    result = []
    lambdas = []
    
    # loop through data
    for x in data:
        #take the log of the absolute of the data
        result.append(np.log(abs(x))) #np.log = ln
        # take average
        lambdas.append(mean(result))
    
    return max(lambdas)


# #4
def hurst_mod(data):
    #Source:  http://pyeeg.sourceforge.net/index.html?highlight=hurst#pyeeg.hurst
    #modified lstsq line to to start from third iterable
    X = data
    N = len(X)

    T = array([float(i) for i in xrange(1,N+1)])
    Y = cumsum(X)
    Ave_T = Y/T

    S_T = zeros((N))
    R_T = zeros((N))
    for i in xrange(N):
        S_T[i] = std(X[:i+1])
        X_T = Y - T * Ave_T[i]
        R_T[i] = max(X_T[:i + 1]) - min(X_T[:i + 1])

    R_S = R_T / S_T
    R_S = log(R_S)
    n = log(T).reshape(N, 1)
    
    #remove NaNs from both R_S and n
    rrr = np.ndarray.flatten(n)

    dfxxx = pd.DataFrame({
    'n': rrr,
    'R_S' : R_S,
    })
    
    dfxxx = dfxxx.dropna(how='any')
    
    N = len(dfxxx)
    
    n = dfxxx['n'].values.reshape(N, 1)
    R_S = dfxxx['R_S'].values

    H = lstsq(n[1:], R_S[1:])[0]
    return H[0]

In [ ]:
def ChaosVars(dflst, col): #or dfenlst if it doesn't work. 
    #make sure you put in a['PI'] etc.
    #tau =  1 + n #embedding lag
    # n = m from fnn function, embedding dimension
    fs = 0.5 #sampling frequency, 1/2
            
    
    LZlst = []
    LLElst = []
    Hlst = []
        
    for i in dflst:
        
        data = i[col].values
        
        datalz = []
        
        thresh = np.nanmedian(data)
        for i in data:
            if i < thresh:
                datalz.append(0)
            else:
                datalz.append(1)
        
        strlz = ''.join(str(x) for x in datalz)
        
        a = ((1.0*lz_complexity(strlz))/random_lz_complexity(len(strlz),p=0.5)) #normalize
        LZlst.append(a)
        
        b = LLEcal(data)
        LLElst.append(b)
     
        try:
            d = hurst_mod(data)
            Hlst.append(d)
        except ValueError:
            Hlst.append(np.nan)
    
    dfchaos = pd.DataFrame({
    col + ' LZ': LZlst,
    col + ' LLE' : LLElst,
    col + ' H' : Hlst,
    }, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
    
    return dfchaos

In [ ]:
dfcPI = ChaosVars(dfenlst, 'PI')
dfcPR = ChaosVars(dfenlst, 'PR')
dfcSpO2 = ChaosVars(dfenlst, 'SpO2')
dfcStO2 = ChaosVars(dfenlst, 'StO2')
dfcFTOE = ChaosVars(dfenlst, 'FTOE')

In [ ]:
display(dfcPI, dfcPR, dfcSpO2, dfcStO2, dfcFTOE)