In [1]:
from IPython.display import Audio
sound_file = 'beep-05.wav'

ROP Data Analysis 0.2


In [2]:
# Import Modules and Identify Low Signals
import numpy as np
import matplotlib.pyplot as plt
from pylab import *
import pandas as pd
from datetime import datetime, timedelta
from pandas import Series, DataFrame, concat
from scipy import stats, fftpack
from IPython.display import display
import sys
import os, errno


#Define any string with 'C' as NaN. 'C' is an indicator for the Masimo as a low signal reading
def readD(val):
    if 'C' in val:
        return np.nan
    return val

%matplotlib inline

In [3]:
Baby_lst = ['ROP003', 'ROP005', 'ROP006', 'ROP007', 'ROP008', 'ROP009', 'ROP010',
          'ROP011', 'ROP012', 'ROP013', 'ROP014', 'ROP015', 'ROP017', 'ROP018',
          'ROP019', 'ROP020', 'ROP021', 'ROP022', 'ROP023', 'ROP025']

#update this as you go

In [4]:
import ROPtimes as rt

Baby = 'ROP005' #change depending on which baby you're doing!
BabyDict = getattr(rt, Baby)

In [5]:
def mkdir_p(path):
    """"
    This function creates a folder at of the given path, unless the folder already exsists. 
    Parameters
    ----------
    path : string,
        full file path or relative file path.
    Returns
    -------
    None
    Notes
    -----
    This does not check that the file path makes sense or is formatted correctly for OS.
    Examples
    --------
    mkdir_p(User/me/my/path)
    References
    ----------
    .. [1] http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python
    """
    try:
        os.makedirs(path)
    except OSError as exc: # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else: raise

In [6]:
mkdir_p('Data/'+Baby)
mkdir_p('Data/'+Baby+'/plots') #makes a plots folder if does not exist
mkdir_p('Data/'+Baby+'/csv') #makes a csv folder if does not exist

Load Data


In [7]:
dfNIRSpre0 = pd.read_csv('/Users/John/Dropbox/LLU/Projects/Retinopathy of Prematurity/NIRS/Clean/' + Baby + 'NIRS.csv',
                         parse_dates={'timestamp': ['Date',' Time']},
                         index_col='timestamp',
                         usecols=['Date', ' Time', ' Ch2 %StO2'],
                         na_values=['0'])

dfPOpre0 = pd.read_csv('/Users/John/Dropbox/LLU/Projects/Retinopathy of Prematurity/Pulse Ox/' + Baby + 'PO.csv',
                       parse_dates={'timestamp': ['Date','Time']},
                       index_col='timestamp',
                       usecols=['Date', 'Time', 'SpO2', 'PR'],
                       na_values=['0'])

dfPIpre0 = pd.read_csv('/Users/John/Dropbox/LLU/Projects/Retinopathy of Prematurity/Pulse Ox/' + Baby + 'PO.csv',
                       parse_dates={'timestamp': ['Date','Time']},
                           #Parse_dates tells the read_csv function to combine the date and time column
                       index_col='timestamp',
                       usecols=['Date', 'Time', 'PI', 'Exceptions'],
                       na_values=['0'], #Na_values is used to turn 0 into NaN
                       converters={'Exceptions':  readD}
                           #Converters: readD is the dict that means any string with 'C' with be NaN (for PI)
                       ) #

dfNIRSpre = dfNIRSpre0.rename(columns={' Ch2 %StO2': 'StO2'}) #rename NIRS column

#Clean the PI dataframe to get rid of rows that have NaN
dfPIpre = dfPIpre0.dropna(how='any')

dfPOpre = dfPIpre.combine_first(dfPOpre0)
#combine PI dataframe with the rest of the Pulse Ox dataframe after cleaning

Clean Data

- Remove all 0's and set as NaN
- Remove all PI values with low signal integrity

In [8]:
# Phone almost always equals Philips monitor

# NIRS date/time is 2 mins and 10 seconds slower than phone. Have to correct for it.
# Pulse ox date/time is 1 mins and 32 seconds faster than phone. Have to correct for it.

#This code is to make the combined dataframe come in even seconds.
#The corrected is 1 second, + for NIRS and - for PO.

TCcorrect = timedelta(seconds=1)

ncorr = dfNIRSpre.index+TCcorrect
pcorr = dfPOpre.index-TCcorrect


if dfNIRSpre.index[:1].second % 2 == 0:
    if dfPOpre.index[:1].second % 2 == 0:
        print '\nBoth NIRS and PO indices are even.'
    elif dfPOpre.index[:1].second % 2 != 0:
        print '\nNIRS even, PO odd. PO index corrected.'
        dfPOpre = dfPOpre.set_index(pcorr)
    else:
        raise NameError('Indices are messed up')
elif dfNIRSpre.index[:1].second % 2 != 0:
    if dfPOpre.index[:1].second % 2 == 0:
        print '\nNIRS odd, PO even. NIRS index corrected.'
        dfNIRSpre = dfNIRSpre.set_index(ncorr)
    elif dfPOpre.index[:1].second % 2 != 0:
        print '\nBoth NIRS and PO indices are odd. Both corrected'
        dfNIRSpre = dfNIRSpre.set_index(ncorr)
        dfPOpre = dfPOpre.set_index(pcorr)
else:
    raise NameError('Indices are messed up')

TCnirs = timedelta(minutes=2, seconds=10)
TCpo = timedelta(minutes=1, seconds=32)
# NIRS is slower than correct time, need to add TCnirs to catch it up
# PO is faster than correct time, need to subtract TCpo to slow it down

dfNIRS = dfNIRSpre.set_index([dfNIRSpre.index+TCnirs]) #NIRS Time Correction
dfPO = dfPOpre.set_index([dfPOpre.index-TCpo]) #PO Time Correction


NIRS even, PO odd. PO index corrected.

In [9]:
#for babies that only had one machine

dffakePO = pd.DataFrame({'SpO2':0, 'PR':0, 'PI':0}, index=dfNIRS.index)
dffakeNIRS = pd.DataFrame({'StO2':0}, index=dfPO.index)

if len(dfNIRS) > 5:
    if len(dfPO) > 5:
        df = dfNIRS.combine_first(dfPO) #Combine two DataFrame objects and default to non-null values in frame
        Masimo = True
        NIRS = True
        print 'Both machines on'
    elif len(dfPO) < 5:
        df = dfNIRS.combine_first(dffakePO)
        print 'Only NIRS on'
        NIRS = True
        Masimo = False
elif len(dfNIRS) < 5:
    df = dffakeNIRS.combine_first(dfPO)
    Masimo = True
    NIRS = False
    print 'Only Masimo on'
else:
    raise NameError('Check your files')


Only NIRS on

In [10]:
# save files

def saveplot(filename):
    plt.savefig('Data/'+Baby+'/plots/'+filename+'.pdf')
    
def savecsv(filename, df):
    df.to_csv('Data/'+Baby+'/csv/'+filename+'.csv')

In [11]:
blr = -(df.index[0]-BabyDict['ExamStart']).total_seconds()*0.000277778
print 'Total (h) Baseline recording = %s' % blr
aer = ((df.index[-1])-BabyDict['ExamEnd']).total_seconds()*0.000277778
print 'Total (h) After Exam recording = %s' % aer


Total (h) Baseline recording = 15.487790168
Total (h) After Exam recording = 8.514451256

In [12]:
#Convert to Relative Time and Time Windows
def reltimecalc(ROPNumber, df):
    ROPreltime = {
        'BaselineStart' : (df.index[0].to_datetime()-ROPNumber['ExamStart']).total_seconds(), #should be negative
        'BaselineEnd': (ROPNumber['EyeDrop1'] - ROPNumber['ExamStart']).total_seconds(), #redundant, just for labeling sake
        'EyeDrop1' : (ROPNumber['EyeDrop1'] - ROPNumber['ExamStart']).total_seconds(), #neg
        'EyeDrop2' : (ROPNumber['EyeDrop2'] - ROPNumber['ExamStart']).total_seconds(), #neg
        'EyeDrop3' : (ROPNumber['EyeDrop3'] - ROPNumber['ExamStart']).total_seconds(), #neg
        'ExamStart' : (ROPNumber['ExamStart'] - ROPNumber['ExamStart']).total_seconds(), 
        'ExamEnd' :  (ROPNumber['ExamEnd'] - ROPNumber['ExamStart']).total_seconds(), #positive number
        'DataEnd' : (df.index[-1].to_datetime()-ROPNumber['ExamStart']).total_seconds()
    }
    
    TimeWindows = {
        'tw0' : -10800.0, # three hours before 0
        'tw1' : 10800.0, # three hours after 0
        'tw2' : 21600.0, # six hoours
        'tw3' : 32400.0, # nine hours
        'tw4' : 43200.0, # 12 hours
        'tw5' : 54000.0, # 15 hours
        'tw6' : 64800.0, # 18 hours
        'tw7' : 75600.0, # 21 hours
        'tw8' : 86400.0 #24 hours
    }
    
    return ROPreltime, TimeWindows

In [13]:
tw = timedelta(hours=3)
twend = timedelta(hours=24)
df = df[(BabyDict['ExamStart']-tw):(BabyDict['ExamStart']+twend)]

In [14]:
Data, tw = reltimecalc(BabyDict, df)

In [15]:
#if after exam recording doesn't reach 24 hours, create fake rows
#necessary to make relative time accurate

dfnan = pd.DataFrame({'Exceptions' : np.NaN}, index = [0])

while len(df) < 48601:
    if len(df) == 48601:
        break
    else:
        df = df.append(dfnan)

In [16]:
df = df.set_index(np.linspace(tw['tw0'],tw['tw8'],len(df)))

In [17]:
if Masimo != False:    
    df = df[df.PI.between(0.02, 20)]
    df = df.drop('Exceptions', 1) # Drop exceptions now that you don't need it anymore
    #filter PI values to only allow  =< 0.02 and >= 20

In [18]:
if Masimo != False and NIRS != False:
    dfpreFTOE = pd.DataFrame({'SpO2' : df['SpO2'].values, 'StO2' : df['StO2'].values}, index=df.index)
    dfpreFTOE = dfpreFTOE.dropna(how='any')

    psa = dfpreFTOE['SpO2']
    prs = dfpreFTOE['StO2']

    dfFTOE = pd.DataFrame({'FTOE' : (((psa-prs)/psa)*100)})

    df = df.join(dfFTOE)

Analysis Start

Linear Measures:

Avgs, Std Dev, and Coefficient of Variation

- Avg, std dev, and CoV for every time window
- Avg every 10 sec for 4 mins of ROP exam, std dev, CoV.
- O2 DeSat Counts

In [19]:
if Masimo == False:
    df = df[['StO2']]
elif NIRS == False:
    df = df.drop(['StO2'], axis=1)
else:
    df = df

In [20]:
#create easy variables for different time epochs

a = df[tw['tw0']:0]
b = df[0:tw['tw1']]
c = df[tw['tw1']:tw['tw2']]
d = df[tw['tw2']:tw['tw3']]
e = df[tw['tw3']:tw['tw4']]
f = df[tw['tw4']:tw['tw5']]
g = df[tw['tw5']:tw['tw6']]
h = df[tw['tw6']:tw['tw7']]
i = df[tw['tw7']:tw['tw8']]

dflst = [a, b, c, d, e, f, g, h, i] #list of dataframes

twind = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'] #list of time windows

if Masimo == False:
    varlst = ['StO2']
elif NIRS == False:
    varlst = ['PI', 'PR', 'SpO2']
else:    
    varlst = ['PI', 'PR', 'SpO2', 'StO2', 'FTOE'] #list of variables

Average every 10 Sec During ROP Exam for First Four Minutes


In [21]:
#write a function that gets the average, std, and cv of a variable for baseline and up to 4 mins

def tensec(var, graph=True, table=True):
    
    ind = list(np.arange(0, 250, 10)) #define index
    ind.insert(0, 'Baseline')
    
    bl = df[tw['tw0']:0] #define baseline time period
    
    blavg = np.nanmean(bl[var])
    blstd = np.nanstd(bl[var])
    blcv = stats.variation(bl[var], nan_policy='omit') * 100 #percent

    r =  np.arange(0, 250, 10)

    avg = [blavg]
    stdev = [blstd]
    cv = [blcv]

    for i in r:

        x = np.nanmean(df[var][i:(i+10)].values) #every 10 seconds
        y = np.nanstd(df[var][i:(i+10)].values)
        z = stats.variation((df[var][i:(i+10)]), nan_policy='omit')

        avg.append(x)
        stdev.append(y)
        cv.append(z)

    dftensec = pd.DataFrame({
    'a_mean': avg,
    'b_std dev' : stdev,
    'c_% CV' : cv,
    }, index = ind)

    dftensec.columns = [var+' mean', var+' std dev', var+' % CV']

    if graph == True:

        dftensec[var + ' mean'].plot(yerr=dftensec[var +' std dev'],
                                          color='b', ecolor='r', ) 
        
        plt.xlim(-1, 26)

        plt.title(Baby + ' '+ var + ' Mean with Std Dev')
        plt.xlabel('Time (s)')
        plt.ylabel(var)
        
        plt.xticks(np.arange(0, 25), ['Baseline', 
                                      0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100,
                                     110, 120, 130, 140, 150, 160, 170, 180, 190, 200,
                                     210, 220, 230, 240], rotation='vertical')
        
        
        saveplot(Baby + var + '_mean_stddev_10sec')

        plt.show()

    if table == True:

        display(dftensec)
        
        savecsv(Baby + var+'_mean_stddev_10sec', dftensec)

    return dftensec

In [22]:
for i in varlst:
    tensec(i)


StO2 mean StO2 std dev StO2 % CV
Baseline 85.168858 6.988948 8.205991
0 82.166667 1.343710 0.016353
10 81.500000 0.500000 0.006135
20 83.000000 1.414214 0.017039
30 85.500000 0.763763 0.008933
40 84.166667 0.372678 0.004428
50 84.833333 0.687184 0.008100
60 85.666667 1.795055 0.020954
70 79.333333 2.357023 0.029710
80 75.166667 1.213352 0.016142
90 71.333333 2.134375 0.029921
100 66.500000 1.118034 0.016813
110 71.166667 2.192158 0.030803
120 75.666667 0.745356 0.009851
130 76.666667 0.471405 0.006149
140 76.833333 1.067187 0.013890
150 73.000000 1.732051 0.023727
160 71.833333 0.897527 0.012495
170 78.166667 2.910708 0.037237
180 77.666667 1.490712 0.019194
190 77.500000 1.118034 0.014426
200 78.666667 0.745356 0.009475
210 78.333333 1.885618 0.024072
220 74.000000 2.236068 0.030217
230 67.500000 1.384437 0.020510
240 68.833333 1.863390 0.027071

In [23]:
def desat_count(df):

    #Find count of these ranges
    below = 0 # v <=80
    middle = 0 #v >= 81 and v<=84
    above = 0 #v >=85 and v<=89
    ls = []

    b_dict = {}
    m_dict = {}
    a_dict = {}

    for i, v in df['SpO2'].iteritems():

        if v <= 80: #below block

            if not ls: 
                ls.append(v)
            else:
                if ls[0] >= 81: #if the range before was not below 80

                    if len(ls) >= 5: #if the range was greater than 10 seconds, set to 5 because data points are every 2

                        if ls[0] <= 84: #was it in the middle range?
                            m_dict[middle] = ls
                            middle += 1
                            ls = [v]
                        elif ls[0] >= 85 and ls[0] <=89: #was it in the above range?
                            a_dict[above] = ls
                            above += 1
                            ls = [v]

                    else: #old list wasn't long enough to count
                        ls = [v]
                else: #if in the same range
                    ls.append(v)

        elif v >= 81 and v<= 84: #middle block

            if not ls:
                ls.append(v)
            else:
                if ls[0] <= 80 or (ls[0]>=85 and ls[0]<= 89): #if not in the middle range
                    if len(ls) >= 5: #if range was greater than 10 seconds

                        if ls[0] <= 80: #was it in the below range?
                            b_dict[below] = ls
                            below += 1
                            ls = [v]
                        elif ls[0] >= 85 and ls[0] <=89: #was it in the above range?
                            a_dict[above] = ls
                            above += 1
                            ls = [v]
                    else: #old list wasn't long enough to count
                        ls = [v]

                else:
                    ls.append(v)

        elif v >= 85 and v <=89: #above block

            if not ls:
                ls.append(v)
            else:
                if ls[0] <=84 : #if not in the above range

                    if len(ls) >= 5: #if range was greater than 
                        if ls[0] <= 80: #was it in the below range?
                            b_dict[below] = ls
                            below += 1
                            ls = [v]
                        elif ls[0] >= 81 and ls[0] <=84: #was it in the middle range?
                            m_dict[middle] = ls
                            middle += 1
                            ls = [v]
                    else: #old list wasn't long enough to count
                        ls = [v]
                else:
                    ls.append(v)

        else: #v>90 or something else weird. start the list over
            ls = []
        
    return len(b_dict), len(m_dict), len(a_dict)

In [24]:
def dfdesat_count(dflst=dflst, graph=True, table=True):
    
    if Masimo != False:
        b_datalst = []
        m_datalst = []
        a_datalst = []
        
        for i in dflst:
            b, m, a  = desat_count(i)
            b_datalst.append(b)
            m_datalst.append(m)
            a_datalst.append(a)
    
    dfresult = pd.DataFrame({'Severe DeSat' : b_datalst, 'Moderate DeSat' : m_datalst, 'Mild DeSat' : a_datalst}, index=twind)

    dfresult.index.name = 'Time Windows (h)'
    
    if graph == True:
        
        dfresult.plot.bar()
        
        plt.title(Baby + ' DeSat Counts')
        plt.xlabel('Time Windows')
        plt.ylabel('DeSat Counts')
        plt.ylim(0,dfresult.values.max())
        
        saveplot(Baby + '_desatcount')

        plt.show()

    
    if table == True:
        
        savecsv(Baby + '_desatcount', dfresult)
        display(dfresult)
        
    return dfresult

In [25]:
if Masimo != False:
    dfdesat_count(dflst)
else:
    pass

In [26]:
def linearmeasures(data): 
    
    if type(data) == pd.core.series.Series:
        a = data.values
    
    avg = np.nanmean(data) #mean
    stdev = np.nanstd(data) #std dev
    cv = stats.variation(data, nan_policy='omit') * 100 #%CoV
    
    return avg, stdev, cv

In [27]:
def dflinmeasure(variable, graph=True, table=True):
    
    datalst = []
    
    for i in dflst:
        x = linearmeasures(i[variable])
        datalst.append(x)
        
    dfresult = pd.DataFrame(datalst, columns=[variable + ' mean', variable + ' std dev', variable + ' % CV'], index=twind)
    
    dfresult.index.name = 'Time Windows (h)'
    
    if graph == True:
        
        dfresult[variable + ' mean'].plot(yerr=dfresult[variable +' std dev'],
                                          color='b', ecolor='r') 

        plt.title(Baby + ' ' + variable + ' Mean with Std Dev')
        plt.xlabel('Time Windows')
        plt.ylabel(variable)
        plt.xlim(-1,9)
        plt.xticks(np.arange(0, 9), twind, rotation='vertical')
        
        saveplot(Baby + variable + '_mean_stddev_24hrs')

        plt.show()

    if table == True:
        
        savecsv(Baby + variable + '_mean_stddev_24hrs', dfresult)
        
        display(dfresult)
        
    return dfresult

In [28]:
if Masimo != False and NIRS !=False:
    PIlin = dflinmeasure('PI')
    PRlin = dflinmeasure('PR')
    SpO2lin = dflinmeasure('SpO2')
    StO2lin = dflinmeasure('StO2')
    FTOElin = dflinmeasure('FTOE')
elif Masimo != True and NIRS == True:
    StO2lin = dflinmeasure('StO2')
elif Masimo == True and NIRS != True:
    PIlin = dflinmeasure('PI')
    PRlin = dflinmeasure('PR')
    SpO2lin = dflinmeasure('SpO2')


/Users/John/Library/Enthought/Canopy_64bit/User/lib/python2.7/site-packages/numpy-1.11.1-py2.7-macosx-10.6-x86_64.egg/numpy/lib/nanfunctions.py:675: RuntimeWarning: Mean of empty slice
  warnings.warn("Mean of empty slice", RuntimeWarning)
/Users/John/Library/Enthought/Canopy_64bit/User/lib/python2.7/site-packages/numpy-1.11.1-py2.7-macosx-10.6-x86_64.egg/numpy/lib/nanfunctions.py:1147: RuntimeWarning: Degrees of freedom <= 0 for slice.
  warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning)
/Users/John/Library/Enthought/Canopy_64bit/User/lib/python2.7/site-packages/numpy-1.11.1-py2.7-macosx-10.6-x86_64.egg/numpy/ma/core.py:4144: UserWarning: Warning: converting a masked element to nan.
  warnings.warn("Warning: converting a masked element to nan.")
StO2 mean StO2 std dev StO2 % CV
Time Windows (h)
-3:0 85.168858 6.988948 8.20599
0:3 86.345121 6.892766 7.98281
3:6 85.357526 8.481818 9.93681
6:9 88.512589 7.367290 8.32344
9:12 NaN NaN --
12:15 NaN NaN --
15:18 NaN NaN --
18:21 NaN NaN --
21:24 NaN NaN --

Information Theory

- Mutual Information
- Fuzzy Entropy
- Cross Fuzzy Entropy

In [29]:
from sklearn.metrics import mutual_info_score
#sklearn.metrics.mutual_info_score(labels_true, labels_pred, contingency=None)

def MutualInfo(var1, var2, graph = True, table = True):

    datalst = []
    
    for i in dflst:
        
        z = i.dropna(how='any') #drop before splitting variables to make len equal
        data1 = z[var1]
        data2 = z[var2]
        
        try:
            x = mutual_info_score(data1, data2)
            datalst.append(x)
        except ValueError:
            datalst.append(np.nan)
            
    dfresult = pd.DataFrame(datalst, columns=['Mutual Info Score ' + var1 + 'x' + var2], index=twind)
    dfresult.index.name = 'Time Windows (h)'
    
    if graph == True:
        
        dfresult.plot(color='k', grid=True) 
        plt.title(Baby + ' Mutual Info Score Over Time Between ' + var1 + ' and ' +var2)
        plt.xlabel('Time Windows (h)')
        plt.ylabel(var1 + 'x' + var2 +' Mutual Info Score')

        saveplot(Baby + var1 + 'x' + var2 + 'mutualinfo')
        
        plt.show()
        
    if table == True:
        
        savecsv(Baby + var1 + 'x' + var2 + 'mutualinfo', dfresult)
        
        display(dfresult)
        
    return dfresult    

def MutualInfoLst(variable, graph=True, table=True): #cross mutualinfo with the whole list
    for i in varlst:
        x = MutualInfo(variable, i)
    return x

In [30]:
if Masimo != True:
    MutualInfo('StO2', 'StO2')
else:
    for i in varlst:
        MutualInfoLst(i)


Mutual Info Score StO2xStO2
Time Windows (h)
-3:0 3.185782
0:3 3.061220
3:6 3.125404
6:9 2.759031
9:12 NaN
12:15 NaN
15:18 NaN
18:21 NaN
21:24 NaN

In [31]:
from entropy import *

#def fuzzyen(x, dim, r, n, scale=True):
#    return entropy(x, dim, r, n=n, scale=scale, remove_baseline=True)
# n is "scale of fuzziness"

def FuzzyEnMeasure(variable, graph = True, table = True):  

    datalst = []
    
    for i in dflst:
        data = i[variable]
        data = data.dropna(how='any') #drop NaNs in series
        try:
            x = fuzzyen(data, 2, .2*np.nanstd(data), n=1) #run FuzzyEn
            datalst.append(x)
        except ValueError:
            datalst.append(np.nan)
        
    dfresult = pd.DataFrame(datalst, columns=[variable + ' FuzzyEn'], index=twind)
    dfresult.index.name = 'Time Windows (h)'
    
    if graph == True:
        
        dfresult[variable + ' FuzzyEn'].plot(color='k', grid=True) 
        plt.title(Baby + variable + ' FuzzyEn over time')
        plt.xlabel('Time Windows (h)')
        plt.ylabel(variable + ' FuzzyEn')
        
        saveplot(Baby + variable + '_FuzzyEn')
        
        plt.show()
        
    if table == True:
        
        savecsv(Baby + variable + '_FuzzyEn', dfresult)
        
        display(dfresult)
        
    return dfresult

In [32]:
if Masimo != False and NIRS !=False:
    PIFuzzyEn = FuzzyEnMeasure('PI')
    PRFuzzyEn = FuzzyEnMeasure('PR')
    SpO2FuzzyEn = FuzzyEnMeasure('SpO2')
    StO2FuzzyEn = FuzzyEnMeasure('StO2')
    FTOEFuzzyEn = FuzzyEnMeasure('FTOE')
elif Masimo != True and NIRS == True:
    StO2FuzzyEn = FuzzyEnMeasure('StO2')
elif Masimo == True and NIRS != True:
    PIFuzzyEn = FuzzyEnMeasure('PI')
    PRFuzzyEn = FuzzyEnMeasure('PR')
    SpO2FuzzyEn = FuzzyEnMeasure('SpO2')


/Users/John/Library/Enthought/Canopy_64bit/User/lib/python2.7/site-packages/numpy-1.11.1-py2.7-macosx-10.6-x86_64.egg/numpy/core/_methods.py:59: RuntimeWarning: Mean of empty slice.
  warnings.warn("Mean of empty slice.", RuntimeWarning)
/Users/John/Library/Enthought/Canopy_64bit/User/lib/python2.7/site-packages/numpy-1.11.1-py2.7-macosx-10.6-x86_64.egg/numpy/core/_methods.py:82: RuntimeWarning: Degrees of freedom <= 0 for slice
  warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
StO2 FuzzyEn
Time Windows (h)
-3:0 0.055378
0:3 0.051202
3:6 0.037478
6:9 0.043106
9:12 NaN
12:15 NaN
15:18 NaN
18:21 NaN
21:24 NaN

In [33]:
def CrossFuzzyMeasures(var1, var2, graph = True, table = True):

    datalst = []
    
    for i in dflst:
        z = i.dropna(how='any') #drop before splitting variables to make len equal
        data1 = z[var1]
        data2 = z[var2]
        
        try:
            x = cross_fuzzyen(data1, data2, 2, .2, n=1)
            datalst.append(x)
        except ValueError:
            datalst.append(np.nan)
        
    dfresult = pd.DataFrame(datalst, columns=['Cross FuzzyEn ' + var1 + 'x' + var2], index=twind)
    dfresult.index.name = 'Time Windows (h)'
    
    if graph == True:
        
        dfresult.plot(color='k', grid=True) 
        plt.title(Baby + ' Cross FuzzyEn Over Time Between ' + var1 + ' and ' +var2)
        plt.xlabel('Time Windows (h)')
        plt.ylabel(var1 + 'x' + var2 +' Cross FuzzyEn')
        
        saveplot(Baby + var1 + 'x' + var2 + '_XFuzzyEn')

        plt.show()
        
    if table == True:
        
        savecsv(Baby + var1 + 'x' + var2 + '_XFuzzyEn', dfresult)
        
        display(dfresult)
        
    return dfresult    

def CrossFuzzyLst(variable, graph=True, table=True):
    for i in varlst:
        x = CrossFuzzyMeasures(variable, i)
    return x

In [34]:
#run all CrossFuzzyLst
if Masimo != True:
    CrossFuzzyMeasures('StO2', 'StO2')
else:
    for i in varlst:
        CrossFuzzyLst(i)


Cross FuzzyEn StO2xStO2
Time Windows (h)
-3:0 0.303288
0:3 0.279193
3:6 0.250213
6:9 0.239984
9:12 NaN
12:15 NaN
15:18 NaN
18:21 NaN
21:24 NaN

Chaos Theory

-Lempel-Ziv complexity
-Largest Lyapunov exponent
-Hurst exponent

In [35]:
# Scripts for Chaos Theory based variables
# Import from other scientists
#

# #1
from analysis import *
    #Source: https://github.com/thelahunginjeet/pydynet/blob/master/analysis.py
    #def lz_complexity(s):
    #Aboy 2006: turn into two bit data based off threshold (threshold = median)
    #turn data into a string of 0's and 1's

# #2
def LLEcal(data):
    #Source: http://systems-sciences.uni-graz.at/etextbook/sw2/lyapunov.html
    result = []
    lambdas = []
    
    # loop through data
    for x in data:
        #take the log of the absolute of the data
        result.append(np.log(abs(x))) #np.log = ln
        # take average
        lambdas.append(mean(result))
    
    return max(lambdas)


# #3
def hurst_mod(data):
    
    #Source:  http://pyeeg.sourceforge.net/index.html?highlight=hurst#pyeeg.hurst
    #modified to drop NaNs
    
    X = data
    N = len(X)

    T = array([float(i) for i in xrange(1,N+1)])
    
    Q = pd.Series(X)
    Y = Q.cumsum()
    
    Ave_T = Y.values/T

    S_T = zeros((N))
    R_T = zeros((N))
    
    for i in xrange(N):
        S_T[i] = std(X[:i+1])
        X_T = Y - T * Ave_T[i]
        R_T[i] = max(X_T[:i + 1]) - min(X_T[:i + 1])

    R_S = R_T / S_T
    R_S = log(R_S)
    n = log(T).reshape(N, 1)
    
    #remove NaNs from both R_S and n
    rrr = np.ndarray.flatten(n)

    dfxxx = pd.DataFrame({
    'n': rrr,
    'R_S' : R_S,
    })
    
    dfxxx = dfxxx.dropna(how='any')
    
    N = len(dfxxx)
    
    n = dfxxx['n'].values.reshape(N, 1)
    R_S = dfxxx['R_S'].values

    H = lstsq(n[1:], R_S[1:])[0]
    return H[0]

In [46]:
def LZ(var, graph=True, table=True):
    
    LZlst = []
    
    for i in dflst:

        try:
            data = i[var]
            datalz = []
            thresh = np.nanmedian(data)
            
            if data.isnull().sum() == len(data): #if all NaN
                LZlst.append(np.nan)
                pass
            
            else:

                for i in data:
                    if i < thresh:
                        datalz.append(0)
                    else:
                        datalz.append(1)
                strlz = ''.join(str(x) for x in datalz)

                a = ((1.0*lz_complexity(strlz))/random_lz_complexity(len(strlz),p=0.5)) #normalize
                LZlst.append(a)
            
        except IndexError:
            LZlst.append(np.nan)
    
    dfresult = pd.DataFrame({var + ' LZ': LZlst}, index=twind)
    
    if graph == True:
        
        dfresult.plot(color='k', grid=True) 
        plt.title(Baby + ' Lempel-Ziv Complexity of '+var+' over time')
        plt.xlabel('Time Windows (h)')
        plt.ylabel('Lempel-Ziv Complexity')
        
        saveplot(Baby + var + '_LZComp')

        plt.show()
    
    if table == True:
        
        savecsv(Baby + var + '_LZComp', dfresult)
        
        display(dfresult)
    
    return dfresult

In [47]:
def LLE(var, graph=True, table=True):
    
    LLElst = []
    
    for i in dflst:
        data = i[var].values
        
        try:
            b = LLEcal(data)
            LLElst.append(b)
        except ValueError:
            LLElst.append(np.nan)

    dfresult = pd.DataFrame({var + ' LLE': LLElst}, index=twind)
    
    if graph == True:
        
        dfresult.plot(color='k', grid=True) 
        plt.title(Baby + ' Largest Lyapunov Exponent of '+var+' over time')
        plt.xlabel('Time Windows (h)')
        plt.ylabel('Largest Lyapunov Exponent')

        saveplot(Baby + var + '_LLE')
        
        plt.show()
    
    if table == True:
        
        savecsv(Baby + var + '_LLE', dfresult)
        
        display(dfresult)
    
    return dfresult

In [48]:
def Hurst(var, graph=True, table=True):
    
    Hlst = []
    
    for i in dflst:
        
        data = i[var]
        data = data.dropna(how='any')
        data = data.values
        
        try:
            d = hurst_mod(data)
            Hlst.append(d)
        except ValueError:
            Hlst.append(np.nan)
    
    dfresult = pd.DataFrame({var + ' Hurst Exp': Hlst}, index=twind)
    
    if graph == True:
        
        dfresult.plot(color='k', grid=True) 
        plt.title(Baby + ' Hurst Exponent of '+var+' over time')
        plt.xlabel('Time Windows (h)')
        plt.ylabel('Hurst Exponent')

        saveplot(Baby + var + '_Hurst')
        
        plt.show()
    
    if table == True:
        
        savecsv(Baby + var + '_Hurst', dfresult)
        
        display(dfresult)
    
    return dfresult

In [49]:
for i in varlst:
    LZ(i)
    LLE(i)
    Hurst(i)
        
Audio(url=sound_file, autoplay=True)


StO2 LZ
-3:0 0.185951
0:3 0.172177
3:6 0.123967
6:9 0.197429
9:12 NaN
12:15 NaN
15:18 NaN
18:21 NaN
21:24 NaN
StO2 LLE
-3:0 4.543295
0:3 4.466478
3:6 4.477200
6:9 4.530414
9:12 NaN
12:15 NaN
15:18 NaN
18:21 NaN
21:24 NaN
StO2 Hurst Exp
-3:0 0.773863
0:3 0.828587
3:6 0.807434
6:9 0.808270
9:12 NaN
12:15 NaN
15:18 NaN
18:21 NaN
21:24 NaN
Out[49]:

In [ ]: