In [1]:
from IPython.display import Audio
sound_file = 'beep-05.wav'
In [2]:
# Import Modules and Identify Low Signals
import numpy as np
import matplotlib.pyplot as plt
from pylab import *
import pandas as pd
from datetime import datetime, timedelta
from pandas import Series, DataFrame, concat
from scipy import stats, fftpack
from IPython.display import display
import sys
import os, errno
#Define any string with 'C' as NaN. 'C' is an indicator for the Masimo as a low signal reading
def readD(val):
if 'C' in val:
return np.nan
return val
%matplotlib inline
In [3]:
Baby_lst = ['ROP003', 'ROP005', 'ROP006', 'ROP007', 'ROP008', 'ROP009', 'ROP010',
'ROP011', 'ROP012', 'ROP013', 'ROP014', 'ROP015', 'ROP017', 'ROP018',
'ROP019', 'ROP020', 'ROP021', 'ROP022', 'ROP023', 'ROP025']
#update this as you go
In [4]:
import ROPtimes as rt
Baby = 'ROP003' #change depending on which baby you're doing!
BabyDict = getattr(rt, Baby)
In [5]:
def mkdir_p(path):
""""
This function creates a folder at of the given path, unless the folder already exsists.
Parameters
----------
path : string,
full file path or relative file path.
Returns
-------
None
Notes
-----
This does not check that the file path makes sense or is formatted correctly for OS.
Examples
--------
mkdir_p(User/me/my/path)
References
----------
.. [1] http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python
"""
try:
os.makedirs(path)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else: raise
In [6]:
mkdir_p('Data/'+Baby)
mkdir_p('Data/'+Baby+'/plots') #makes a plots folder if does not exist
mkdir_p('Data/'+Baby+'/csv') #makes a csv folder if does not exist
In [7]:
dfNIRSpre0 = pd.read_csv('/Users/John/Dropbox/LLU/Projects/Retinopathy of Prematurity/NIRS/Clean/' + Baby + 'NIRS.csv',
parse_dates={'timestamp': ['Date',' Time']},
index_col='timestamp',
usecols=['Date', ' Time', ' Ch2 %StO2'],
na_values=['0'])
dfPOpre0 = pd.read_csv('/Users/John/Dropbox/LLU/Projects/Retinopathy of Prematurity/Pulse Ox/' + Baby + 'PO.csv',
parse_dates={'timestamp': ['Date','Time']},
index_col='timestamp',
usecols=['Date', 'Time', 'SpO2', 'PR'],
na_values=['0'])
dfPIpre0 = pd.read_csv('/Users/John/Dropbox/LLU/Projects/Retinopathy of Prematurity/Pulse Ox/' + Baby + 'PO.csv',
parse_dates={'timestamp': ['Date','Time']},
#Parse_dates tells the read_csv function to combine the date and time column
index_col='timestamp',
usecols=['Date', 'Time', 'PI', 'Exceptions'],
na_values=['0'], #Na_values is used to turn 0 into NaN
converters={'Exceptions': readD}
#Converters: readD is the dict that means any string with 'C' with be NaN (for PI)
) #
dfNIRSpre = dfNIRSpre0.rename(columns={' Ch2 %StO2': 'StO2'}) #rename NIRS column
#Clean the PI dataframe to get rid of rows that have NaN
dfPIpre = dfPIpre0.dropna(how='any')
dfPOpre = dfPIpre.combine_first(dfPOpre0)
#combine PI dataframe with the rest of the Pulse Ox dataframe after cleaning
In [8]:
# Phone almost always equals Philips monitor
# NIRS date/time is 2 mins and 10 seconds slower than phone. Have to correct for it.
# Pulse ox date/time is 1 mins and 32 seconds faster than phone. Have to correct for it.
#This code is to make the combined dataframe come in even seconds.
#The corrected is 1 second, + for NIRS and - for PO.
TCcorrect = timedelta(seconds=1)
ncorr = dfNIRSpre.index+TCcorrect
pcorr = dfPOpre.index-TCcorrect
if dfNIRSpre.index[:1].second % 2 == 0:
if dfPOpre.index[:1].second % 2 == 0:
print '\nBoth NIRS and PO indices are even.'
elif dfPOpre.index[:1].second % 2 != 0:
print '\nNIRS even, PO odd. PO index corrected.'
dfPOpre = dfPOpre.set_index(pcorr)
else:
raise NameError('Indices are messed up')
elif dfNIRSpre.index[:1].second % 2 != 0:
if dfPOpre.index[:1].second % 2 == 0:
print '\nNIRS odd, PO even. NIRS index corrected.'
dfNIRSpre = dfNIRSpre.set_index(ncorr)
elif dfPOpre.index[:1].second % 2 != 0:
print '\nBoth NIRS and PO indices are odd. Both corrected'
dfNIRSpre = dfNIRSpre.set_index(ncorr)
dfPOpre = dfPOpre.set_index(pcorr)
else:
raise NameError('Indices are messed up')
TCnirs = timedelta(minutes=2, seconds=10)
TCpo = timedelta(minutes=1, seconds=32)
# NIRS is slower than correct time, need to add TCnirs to catch it up
# PO is faster than correct time, need to subtract TCpo to slow it down
dfNIRS = dfNIRSpre.set_index([dfNIRSpre.index+TCnirs]) #NIRS Time Correction
dfPO = dfPOpre.set_index([dfPOpre.index-TCpo]) #PO Time Correction
In [9]:
#for babies that only had one machine
dffakePO = pd.DataFrame({'SpO2':0, 'PR':0, 'PI':0}, index=dfNIRS.index)
dffakeNIRS = pd.DataFrame({'StO2':0}, index=dfPO.index)
if len(dfNIRS) > 5:
if len(dfPO) > 5:
df = dfNIRS.combine_first(dfPO) #Combine two DataFrame objects and default to non-null values in frame
Masimo = True
NIRS = True
print 'Both machines on'
elif len(dfPO) < 5:
df = dfNIRS.combine_first(dffakePO)
print 'Only NIRS on'
NIRS = True
Masimo = False
elif len(dfNIRS) < 5:
df = dffakeNIRS.combine_first(dfPO)
Masimo = True
NIRS = False
print 'Only Masimo on'
else:
raise NameError('Check your files')
In [10]:
# save files
def saveplot(filename):
plt.savefig('Data/'+Baby+'/plots/'+'BDA_'+filename+'.pdf')
def savecsv(filename, df):
df.to_csv('Data/'+Baby+'/csv/'+'BDA_'+filename+'.csv')
In [11]:
blr = -(df.index[0]-BabyDict['ExamStart']).total_seconds()*0.000277778
print 'Total (h) Baseline recording = %s' % blr
aer = ((df.index[-1])-BabyDict['ExamEnd']).total_seconds()*0.000277778
print 'Total (h) After Exam recording = %s' % aer
In [12]:
#Convert to Relative Time and Time Windows
def reltimecalc(ROPNumber, df):
ROPreltime = {
'BaselineStart' : (df.index[0].to_datetime()-ROPNumber['ExamStart']).total_seconds(), #should be negative
'BaselineEnd': (ROPNumber['EyeDrop1'] - ROPNumber['ExamStart']).total_seconds(), #redundant, just for labeling sake
'EyeDrop1' : (ROPNumber['EyeDrop1'] - ROPNumber['ExamStart']).total_seconds(), #neg
'EyeDrop2' : (ROPNumber['EyeDrop2'] - ROPNumber['ExamStart']).total_seconds(), #neg
'EyeDrop3' : (ROPNumber['EyeDrop3'] - ROPNumber['ExamStart']).total_seconds(), #neg
'ExamStart' : (ROPNumber['ExamStart'] - ROPNumber['ExamStart']).total_seconds(),
'ExamEnd' : (ROPNumber['ExamEnd'] - ROPNumber['ExamStart']).total_seconds(), #positive number
'DataEnd' : (df.index[-1].to_datetime()-ROPNumber['ExamStart']).total_seconds()
}
TimeWindows = {
'tw0' : -10800.0, # three hours before 0
'tw1' : 10800.0, # three hours after 0
'tw2' : 21600.0, # six hoours
'tw3' : 32400.0, # nine hours
'tw4' : 43200.0, # 12 hours
'tw5' : 54000.0, # 15 hours
'tw6' : 64800.0, # 18 hours
'tw7' : 75600.0, # 21 hours
'tw8' : 86400.0 #24 hours
}
return ROPreltime, TimeWindows
In [13]:
tw = timedelta(hours=3)
twend = timedelta(hours=24)
df = df[(BabyDict['ExamStart']-tw):(BabyDict['ExamStart']+twend)]
In [14]:
Data, tw = reltimecalc(BabyDict, df)
In [15]:
#if after exam recording doesn't reach 24 hours, create fake rows
#necessary to make relative time accurate
dfnan = pd.DataFrame({'Exceptions' : np.NaN}, index = [0])
while len(df) < 48601:
if len(df) == 48601:
break
else:
df = df.append(dfnan)
In [16]:
df = df.set_index(np.linspace(tw['tw0'],tw['tw8'],len(df)))
In [17]:
if Masimo != False:
df = df[df.PI.between(0.02, 20)]
df = df.drop('Exceptions', 1) # Drop exceptions now that you don't need it anymore
#filter PI values to only allow =< 0.02 and >= 20
In [18]:
if Masimo != False and NIRS != False:
dfpreFTOE = pd.DataFrame({'SpO2' : df['SpO2'].values, 'StO2' : df['StO2'].values}, index=df.index)
dfpreFTOE = dfpreFTOE.dropna(how='any')
psa = dfpreFTOE['SpO2']
prs = dfpreFTOE['StO2']
dfFTOE = pd.DataFrame({'FTOE' : (((psa-prs)/psa)*100)})
df = df.join(dfFTOE)
In [19]:
if Masimo == False:
df = df[['StO2']]
elif NIRS == False:
df = df.drop(['StO2'], axis=1)
else:
df = df
In [20]:
#create easy variables for different time epochs
a = df[tw['tw0']:0] #before
b = df[0:240] #during
c = df[240:tw['tw4']] #after 12
d = df[240:tw['tw8']] #after 24
if aer <= 12:
d = df[tw['tw4']:tw['tw8']]
d[d > 0] = np.NaN
dflst = [a, b, c, d] #list of dataframes
twind = ['Before','During','12H After','24H After'] #list of time windows
if Masimo == False:
varlst = ['StO2']
elif NIRS == False:
varlst = ['PI', 'PR', 'SpO2']
else:
varlst = ['PI', 'PR', 'SpO2', 'StO2', 'FTOE'] #list of variables
In [21]:
def linearmeasures(data):
if type(data) == pd.core.series.Series:
a = data.values
avg = np.nanmean(data) #mean
stdev = np.nanstd(data) #std dev
cv = stats.variation(data, nan_policy='omit') * 100 #%CoV
return avg, stdev, cv
In [22]:
def dflinmeasure(variable, graph=True, table=True):
datalst = []
for i in dflst:
x = linearmeasures(i[variable])
datalst.append(x)
dfresult = pd.DataFrame(datalst, columns=[variable + ' mean', variable + ' std dev', variable + ' % CV'], index=twind)
dfresult.index.name = 'Time Windows (h)'
if graph == True:
dfresult[variable + ' mean'].plot(yerr=dfresult[variable +' std dev'],
color='b', ecolor='r')
plt.title(Baby + ' ' + variable + ' Mean with Std Dev')
plt.xlabel('Time Windows')
plt.ylabel(variable)
plt.xlim(-1,9)
plt.xticks(np.arange(0, 9), twind, rotation='vertical')
saveplot(Baby + variable + '_mean_stddev_24hrs')
plt.show()
if table == True:
savecsv(Baby + variable + '_mean_stddev_24hrs', dfresult)
display(dfresult)
return dfresult
In [23]:
if Masimo != False and NIRS !=False:
PIlin = dflinmeasure('PI')
PRlin = dflinmeasure('PR')
SpO2lin = dflinmeasure('SpO2')
StO2lin = dflinmeasure('StO2')
FTOElin = dflinmeasure('FTOE')
elif Masimo != True and NIRS == True:
StO2lin = dflinmeasure('StO2')
elif Masimo == True and NIRS != True:
PIlin = dflinmeasure('PI')
PRlin = dflinmeasure('PR')
SpO2lin = dflinmeasure('SpO2')
In [24]:
from sklearn.metrics import mutual_info_score
#sklearn.metrics.mutual_info_score(labels_true, labels_pred, contingency=None)
def MutualInfo(var1, var2, graph = True, table = True):
datalst = []
for i in dflst:
z = i.dropna(how='any') #drop before splitting variables to make len equal
data1 = z[var1]
data2 = z[var2]
try:
x = mutual_info_score(data1, data2)
datalst.append(x)
except ValueError:
datalst.append(np.nan)
dfresult = pd.DataFrame(datalst, columns=['Mutual Info Score ' + var1 + 'x' + var2], index=twind)
dfresult.index.name = 'Time Windows (h)'
if graph == True:
dfresult.plot(color='k', grid=True)
plt.title(Baby + ' Mutual Info Score Over Time Between ' + var1 + ' and ' +var2)
plt.xlabel('Time Windows (h)')
plt.ylabel(var1 + 'x' + var2 +' Mutual Info Score')
saveplot(Baby + var1 + 'x' + var2 + 'mutualinfo')
plt.show()
if table == True:
savecsv(Baby + var1 + 'x' + var2 + 'mutualinfo', dfresult)
display(dfresult)
return dfresult
def MutualInfoLst(variable, graph=True, table=True): #cross mutualinfo with the whole list
for i in varlst:
x = MutualInfo(variable, i)
return x
In [25]:
if Masimo != True:
MutualInfo('StO2', 'StO2')
else:
for i in varlst:
MutualInfoLst(i)
In [26]:
from entropy import *
#def fuzzyen(x, dim, r, n, scale=True):
# return entropy(x, dim, r, n=n, scale=scale, remove_baseline=True)
# n is "scale of fuzziness"
def FuzzyEnMeasure(variable, graph = True, table = True):
datalst = []
for i in dflst:
data = i[variable]
data = data.dropna(how='any') #drop NaNs in series
try:
x = fuzzyen(data, 2, .2*np.nanstd(data), n=1) #run FuzzyEn
datalst.append(x)
except ValueError:
datalst.append(np.nan)
dfresult = pd.DataFrame(datalst, columns=[variable + ' FuzzyEn'], index=twind)
dfresult.index.name = 'Time Windows (h)'
if graph == True:
dfresult[variable + ' FuzzyEn'].plot(color='k', grid=True)
plt.title(Baby + variable + ' FuzzyEn over time')
plt.xlabel('Time Windows (h)')
plt.ylabel(variable + ' FuzzyEn')
saveplot(Baby + variable + '_FuzzyEn')
plt.show()
if table == True:
savecsv(Baby + variable + '_FuzzyEn', dfresult)
display(dfresult)
return dfresult
In [27]:
if Masimo != False and NIRS !=False:
PIFuzzyEn = FuzzyEnMeasure('PI')
PRFuzzyEn = FuzzyEnMeasure('PR')
SpO2FuzzyEn = FuzzyEnMeasure('SpO2')
StO2FuzzyEn = FuzzyEnMeasure('StO2')
FTOEFuzzyEn = FuzzyEnMeasure('FTOE')
elif Masimo != True and NIRS == True:
StO2FuzzyEn = FuzzyEnMeasure('StO2')
elif Masimo == True and NIRS != True:
PIFuzzyEn = FuzzyEnMeasure('PI')
PRFuzzyEn = FuzzyEnMeasure('PR')
SpO2FuzzyEn = FuzzyEnMeasure('SpO2')
In [28]:
def CrossFuzzyMeasures(var1, var2, graph = True, table = True):
datalst = []
for i in dflst:
z = i.dropna(how='any') #drop before splitting variables to make len equal
data1 = z[var1]
data2 = z[var2]
try:
x = cross_fuzzyen(data1, data2, 2, .2, n=1)
datalst.append(x)
except ValueError:
datalst.append(np.nan)
dfresult = pd.DataFrame(datalst, columns=['Cross FuzzyEn ' + var1 + 'x' + var2], index=twind)
dfresult.index.name = 'Time Windows (h)'
if graph == True:
dfresult.plot(color='k', grid=True)
plt.title(Baby + ' Cross FuzzyEn Over Time Between ' + var1 + ' and ' +var2)
plt.xlabel('Time Windows (h)')
plt.ylabel(var1 + 'x' + var2 +' Cross FuzzyEn')
saveplot(Baby + var1 + 'x' + var2 + '_XFuzzyEn')
plt.show()
if table == True:
savecsv(Baby + var1 + 'x' + var2 + '_XFuzzyEn', dfresult)
display(dfresult)
return dfresult
def CrossFuzzyLst(variable, graph=True, table=True):
for i in varlst:
x = CrossFuzzyMeasures(variable, i)
return x
In [29]:
#run all CrossFuzzyLst
if Masimo != True:
CrossFuzzyMeasures('StO2', 'StO2')
else:
for i in varlst:
CrossFuzzyLst(i)
In [30]:
# Scripts for Chaos Theory based variables
# Import from other scientists
#
# #1
from analysis import *
#Source: https://github.com/thelahunginjeet/pydynet/blob/master/analysis.py
#def lz_complexity(s):
#Aboy 2006: turn into two bit data based off threshold (threshold = median)
#turn data into a string of 0's and 1's
# #2
def LLEcal(data):
#Source: http://systems-sciences.uni-graz.at/etextbook/sw2/lyapunov.html
result = []
lambdas = []
# loop through data
for x in data:
#take the log of the absolute of the data
result.append(np.log(abs(x))) #np.log = ln
# take average
lambdas.append(mean(result))
return max(lambdas)
# #3
def hurst_mod(data):
#Source: http://pyeeg.sourceforge.net/index.html?highlight=hurst#pyeeg.hurst
#modified to drop NaNs
X = data
N = len(X)
T = array([float(i) for i in xrange(1,N+1)])
Q = pd.Series(X)
Y = Q.cumsum()
Ave_T = Y.values/T
S_T = zeros((N))
R_T = zeros((N))
for i in xrange(N):
S_T[i] = std(X[:i+1])
X_T = Y - T * Ave_T[i]
R_T[i] = max(X_T[:i + 1]) - min(X_T[:i + 1])
R_S = R_T / S_T
R_S = log(R_S)
n = log(T).reshape(N, 1)
#remove NaNs from both R_S and n
rrr = np.ndarray.flatten(n)
dfxxx = pd.DataFrame({
'n': rrr,
'R_S' : R_S,
})
dfxxx = dfxxx.dropna(how='any')
N = len(dfxxx)
n = dfxxx['n'].values.reshape(N, 1)
R_S = dfxxx['R_S'].values
H = lstsq(n[1:], R_S[1:])[0]
return H[0]
In [31]:
def LZ(var, graph=True, table=True):
LZlst = []
for i in dflst:
try:
data = i[var]
datalz = []
thresh = np.nanmedian(data)
if data.isnull().sum() == len(data): #if all NaN
LZlst.append(np.nan)
pass
else:
for i in data:
if i < thresh:
datalz.append(0)
else:
datalz.append(1)
strlz = ''.join(str(x) for x in datalz)
a = ((1.0*lz_complexity(strlz))/random_lz_complexity(len(strlz),p=0.5)) #normalize
LZlst.append(a)
except IndexError:
LZlst.append(np.nan)
dfresult = pd.DataFrame({var + ' LZ': LZlst}, index=twind)
if graph == True:
dfresult.plot(color='k', grid=True)
plt.title(Baby + ' Lempel-Ziv Complexity of '+var+' over time')
plt.xlabel('Time Windows (h)')
plt.ylabel('Lempel-Ziv Complexity')
saveplot(Baby + var + '_LZComp')
plt.show()
if table == True:
savecsv(Baby + var + '_LZComp', dfresult)
display(dfresult)
return dfresult
In [32]:
def LLE(var, graph=True, table=True):
LLElst = []
for i in dflst:
data = i[var].values
try:
b = LLEcal(data)
LLElst.append(b)
except ValueError:
LLElst.append(np.nan)
dfresult = pd.DataFrame({var + ' LLE': LLElst}, index=twind)
if graph == True:
dfresult.plot(color='k', grid=True)
plt.title(Baby + ' Largest Lyapunov Exponent of '+var+' over time')
plt.xlabel('Time Windows (h)')
plt.ylabel('Largest Lyapunov Exponent')
saveplot(Baby + var + '_LLE')
plt.show()
if table == True:
savecsv(Baby + var + '_LLE', dfresult)
display(dfresult)
return dfresult
In [33]:
def Hurst(var, graph=True, table=True):
Hlst = []
for i in dflst:
data = i[var]
data = data.dropna(how='any')
data = data.values
try:
d = hurst_mod(data)
Hlst.append(d)
except ValueError:
Hlst.append(np.nan)
dfresult = pd.DataFrame({var + ' Hurst Exp': Hlst}, index=twind)
if graph == True:
dfresult.plot(color='k', grid=True)
plt.title(Baby + ' Hurst Exponent of '+var+' over time')
plt.xlabel('Time Windows (h)')
plt.ylabel('Hurst Exponent')
saveplot(Baby + var + '_Hurst')
plt.show()
if table == True:
savecsv(Baby + var + '_Hurst', dfresult)
display(dfresult)
return dfresult
In [34]:
for i in varlst:
LZ(i)
LLE(i)
Hurst(i)
Audio(url=sound_file, autoplay=True)
Out[34]:
In [ ]: