In [90]:
# Import Modules and Identify Low Signals
import numpy as np
import matplotlib.pyplot as plt
from pylab import *
import pandas as pd
from datetime import datetime, timedelta
from pandas import Series, DataFrame, concat
import scipy
import scipy.stats
import scipy.fftpack
#Define any string with 'C' as NaN. 'C' is an indicator for the Masimo as a low signal reading
def readD(val):
if 'C' in val:
return np.nan
return val
%matplotlib inline
In [91]:
import ROPtimes as rt
Baby = 'ROP015' #change depending on which baby you're doing!
BabyDict = getattr(rt, Baby)
In [92]:
dfNIRSpre0 = pd.read_csv('/Users/John/Dropbox/LLU/Projects/Retinopathy of Prematurity/NIRS/Clean/' + Baby + 'NIRS.csv',
parse_dates={'timestamp': ['Date',' Time']},
index_col='timestamp',
usecols=['Date', ' Time', ' Ch2 %StO2'],
na_values=['0'])
dfPOpre0 = pd.read_csv('/Users/John/Dropbox/LLU/Projects/Retinopathy of Prematurity/Pulse Ox/' + Baby + 'PO.csv',
parse_dates={'timestamp': ['Date','Time']},
index_col='timestamp',
usecols=['Date', 'Time', 'SpO2', 'PR'],
na_values=['0'])
dfPIpre0 = pd.read_csv('/Users/John/Dropbox/LLU/Projects/Retinopathy of Prematurity/Pulse Ox/' + Baby + 'PO.csv',
parse_dates={'timestamp': ['Date','Time']},
index_col='timestamp',
usecols=['Date', 'Time', 'PI', 'Exceptions'],
na_values=['0'],
converters={'Exceptions': readD}
) #make a PI df as well to clean it better
dfNIRSpre = dfNIRSpre0.rename(columns={' Ch2 %StO2': 'StO2'}) #rename NIRS column
#Clean the PI dataframe to get rid of rows that have NaN
dfPIpre = dfPIpre0[dfPIpre0.loc[:, ['PI', 'Exceptions']].apply(pd.notnull).all(1)]
dfPOpre = dfPIpre.combine_first(dfPOpre0)
#combine PI dataframe with the rest of the Pulse Ox dataframe after cleaning
'''
Parse_dates tells the read_csv function to combine the date and time column
Into one timestamp column and parse it as a timestamp.
Pandas is smart enough to know how to parse a date in various formats
Index_col sets the timestamp column to be the index.
Usecols tells the read_csv function to select only the subset of the columns.
Na_values is used to turn 0 into NaN
Converters: readD is the dict that means any string with 'C' with be NaN (for PI)
'''
Out[92]:
In [93]:
# Phone almost always equals Philips monitor
# NIRS date/time is 2 mins and 10 seconds slower than phone. Have to correct for it.
# Pulse ox date/time is 1 mins and 32 seconds faster than phone. Have to correct for it.
#This code is to make the combined dataframe come in even seconds.
#The corrected is 1 second, + for NIRS and - for PO.
TCcorrect = timedelta(seconds=1)
ncorr = dfNIRSpre.index+TCcorrect
pcorr = dfPOpre.index-TCcorrect
if dfNIRSpre.index[:1].second % 2 == 0:
if dfPOpre.index[:1].second % 2 == 0:
print '\nBoth NIRS and PO indices are even.'
elif dfPOpre.index[:1].second % 2 != 0:
print '\nNIRS even, PO odd. PO index corrected.'
dfPOpre = dfPOpre.set_index(pcorr)
else:
raise NameError('Indices are messed up')
elif dfNIRSpre.index[:1].second % 2 != 0:
if dfPOpre.index[:1].second % 2 == 0:
print '\nNIRS odd, PO even. NIRS index corrected.'
dfNIRSpre = dfNIRSpre.set_index(ncorr)
elif dfPOpre.index[:1].second % 2 != 0:
print '\nBoth NIRS and PO indices are odd. Both corrected'
dfNIRSpre = dfNIRSpre.set_index(ncorr)
dfPOpre = dfPOpre.set_index(pcorr)
else:
raise NameError('Indices are messed up')
TCnirs = timedelta(minutes=2, seconds=10)
TCpo = timedelta(minutes=1, seconds=32)
# NIRS is slower than correct time, need to add TCnirs to catch it up
# PO is faster than correct time, need to subtract TCpo to slow it down
dfNIRS = dfNIRSpre.set_index([dfNIRSpre.index+TCnirs]) #NIRS Time Correction
dfPO = dfPOpre.set_index([dfPOpre.index-TCpo]) #PO Time Correction
In [94]:
#for babies that only had one machine
dffakePO = pd.DataFrame({'SpO2':0, 'PR':0, 'PI':0}, index=dfNIRS.index)
dffakeNIRS = pd.DataFrame({'StO2':0}, index=dfPO.index)
if len(dfNIRS) > 5:
if len(dfPO) > 5:
df = dfNIRS.combine_first(dfPO) #Combine two DataFrame objects and default to non-null values in frame
print 'Both machines on'
elif len(dfPO) < 5:
df = dfNIRS.combine_first(dffakePO)
print 'Only NIRS on'
elif len(dfNIRS) < 5:
df = dffakeNIRS.combine_first(dfPO)
print 'Only Masimo on'
else:
raise NameError('Check your files')
In [95]:
a = -(df.index[0]-BabyDict['ExamStart']).total_seconds()*0.000277778
print 'Total (h) Baseline recording = %s' % a
b = ((df.index[-1])-BabyDict['ExamEnd']).total_seconds()*0.000277778
print 'Total (h) After Exam recording = %s' % b
In [96]:
#Convert to Relative Time and Time Windows
def reltimecalc(ROPNumber, df):
ROPreltime = {
'BaselineStart' : (df.index[0].to_datetime()-ROPNumber['ExamStart']).total_seconds(), #should be negative
'BaselineEnd': (ROPNumber['EyeDrop1'] - ROPNumber['ExamStart']).total_seconds(), #redundant, just for labeling sake
'EyeDrop1' : (ROPNumber['EyeDrop1'] - ROPNumber['ExamStart']).total_seconds(), #neg
'EyeDrop2' : (ROPNumber['EyeDrop2'] - ROPNumber['ExamStart']).total_seconds(), #neg
'EyeDrop3' : (ROPNumber['EyeDrop3'] - ROPNumber['ExamStart']).total_seconds(), #neg
'ExamStart' : (ROPNumber['ExamStart'] - ROPNumber['ExamStart']).total_seconds(),
'ExamEnd' : (ROPNumber['ExamEnd'] - ROPNumber['ExamStart']).total_seconds(), #positive number
'DataEnd' : (df.index[-1].to_datetime()-ROPNumber['ExamStart']).total_seconds()
}
TimeWindows = {
'tw0' : -10800.0, # three hours before 0
'tw1' : 10800.0, # three hours after 0
'tw2' : 21600.0, # six hoours
'tw3' : 32400.0, # nine hours
'tw4' : 43200.0, # 12 hours
'tw5' : 54000.0, # 15 hours
'tw6' : 64800.0, # 18 hours
'tw7' : 75600.0, # 21 hours
'tw8' : 86400.0 #24 hours
}
return ROPreltime, TimeWindows
In [97]:
tw = timedelta(hours=3)
twend = timedelta(hours=24)
df = df[(BabyDict['ExamStart']-tw):(BabyDict['ExamStart']+twend)]
In [98]:
Data, tw = reltimecalc(BabyDict, df)
In [99]:
df = df.set_index(np.linspace(tw['tw0'],tw['tw8'],len(df)))
In [100]:
df = df[df.PI.between(0.02, 20)]
df = df.drop('Exceptions', 1) # Drop exceptions now that you don't need it anymore
#filter PI values to only allow =< 0.02 and >= 20
dfen = df.dropna(how='any') #df for no NaN
In [101]:
psa = dfen['SpO2'].values
prs = dfen['StO2'].values
FTOE = pd.Series(((psa-prs)/psa)*100)
dfen = dfen.assign(FTOE=FTOE.values)
In [102]:
a = df[tw['tw0']:0]
b = df[0:tw['tw1']]
c = df[tw['tw1']:tw['tw2']]
d = df[tw['tw2']:tw['tw3']]
e = df[tw['tw3']:tw['tw4']]
f = df[tw['tw4']:tw['tw5']]
g = df[tw['tw5']:tw['tw6']]
h = df[tw['tw6']:tw['tw7']]
i = df[tw['tw7']:tw['tw8']]
dflst = [a, b, c, d, e, f, g, h, i]
tlst = [tw['tw0'], 0, tw['tw1'], tw['tw2'], tw['tw3'], tw['tw4'], tw['tw5'],
tw['tw6'], tw['tw7'], tw['tw8']]
#dfen for no na values
ea = dfen[tw['tw0']:0]
eb = dfen[0:tw['tw1']]
ec = dfen[tw['tw1']:tw['tw2']]
ed = dfen[tw['tw2']:tw['tw3']]
ee = dfen[tw['tw3']:tw['tw4']]
ef = dfen[tw['tw4']:tw['tw5']]
eg = dfen[tw['tw5']:tw['tw6']]
eh = dfen[tw['tw6']:tw['tw7']]
ei = dfen[tw['tw7']:tw['tw8']]
dfenlst = [ea, eb, ec, ed, ee, ef, eg, eh, ei] #basically same dataframe but all NaNs dropped
#some functions will return NaN if there's even one NaN present so this is why this is necessary
#entropy functions replace original data, so need to make a copy
dfenlstcopy1 = [] #use this for entropy measures
dfenlstcopy2 = [] #use this for cross entropy meaures
dfenlstcopy3 = [] #use this for baseline cross entropy measures
for i in dfenlst:
cpy = i.copy(deep=True)
dfenlstcopy1.append(cpy)
dfenlstcopy2.append(cpy)
dfenlstcopy3.append(cpy)
In [103]:
#AVERAGE DURING ROP EXAM FOR FIRST FOUR MINUTES
#tensec needs df['y']
def tensec(a, b): #a = baseline values, b = next time epoch
blavg = np.nanmean(a.values)
blstd = np.nanstd(a.values)
blcv = scipy.stats.variation(a.values, nan_policy='omit')
r = np.arange(0, 250, 10)
avg = [blavg]
stdev = [blstd]
cv = [blcv]
ind = list(np.arange(0, 250, 10))
ind.insert(0, 'Baseline')
for i in r:
x = b[i:(i+10)].mean()
y = b[i:(i+10)].std()
z = scipy.stats.variation((b[i:(i+10)]).values, nan_policy='omit')
avg.append(x)
stdev.append(y)
cv.append(z)
dftensec = pd.DataFrame({
'a_mean': avg,
'b_std dev' : stdev,
'c_CoV' : cv,
}, index = ind)
return dftensec
In [104]:
PIten = tensec(a['PI'], b['PI'])
PIten.columns = ['PI mean', 'PI std dev', 'PI CV']
PRten = tensec(a['PR'], b['PR'])
PRten.columns = ['PR mean', 'PR std dev', 'PR CV']
SpO2ten = tensec(a['SpO2'], b['SpO2'])
SpO2ten.columns = ['SpO2 mean', 'SpO2 std dev', 'PR CV']
StO2ten = tensec(a['StO2'], b['StO2'])
StO2ten.columns = ["StO2 mean", "StO2 std dev", "StO2 CV"]
FTOEten = tensec(ea['FTOE'], eb['FTOE']) #e means dropped nan
FTOEten.columns = ['FTOE mean', 'FTOE std dev', 'FTOE CV']
In [105]:
def linearmeasures(avglst):
#mean
PImean = []
PRmean = []
SpO2mean = []
StO2mean = []
FTOEmean = []
#std dev
PIstd = []
PRstd = []
SpO2std = []
StO2std = []
FTOEstd = []
#coefficient of varience
PIcv = []
PRcv = []
SpO2cv = []
StO2cv = []
FTOEcv = []
for i in avglst:
ax = np.nanmean(i['PI'].values) #mean
PImean.append(ax)
az = np.nanstd(i['PI'].values) #std dev
PIstd.append(az)
av = scipy.stats.variation(i['PI'].values, nan_policy='omit') #CoV
PIcv.append(av)
bx = np.nanmean(i['PR'].values) #mean
PRmean.append(bx)
bz = np.nanstd(i['PR'].values) #std dev
PRstd.append(bz)
bv = scipy.stats.variation(i['PR'].values, nan_policy='omit') #CoV
PRcv.append(bv)
cx = np.nanmean(i['SpO2'].values) #mean
SpO2mean.append(cx)
cz = np.nanstd(i['SpO2'].values) #std dev
SpO2std.append(cz)
cv = scipy.stats.variation(i['SpO2'].values, nan_policy='omit') #CoV
SpO2cv.append(cv)
dx = np.nanmean(i['StO2'].values) #mean
StO2mean.append(dx)
dz = np.nanstd(i['StO2'].values) #std dev
StO2std.append(dz)
dv = scipy.stats.variation(i['StO2'].values, nan_policy='omit') #CoV
StO2cv.append(dv)
ex = np.nanmean(i['FTOE'].values) #mean
FTOEmean.append(ex)
ez = np.nanstd(i['FTOE'].values) #std dev
FTOEstd.append(ez)
ev = scipy.stats.variation(i['FTOE'].values, nan_policy='omit') #CoV
FTOEcv.append(ev)
PIlin = pd.DataFrame({
'a_PI mean': PImean,
'b_PI std dev' : PIstd,
'c_PI CoV' : PIcv,
}, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
PRlin = pd.DataFrame({
'a_PR mean': PRmean,
'b_PR std dev' : PRstd,
'c_PR CoV' : PRcv,
}, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
SpO2lin = pd.DataFrame({
'a_SpO2 mean': SpO2mean,
'b_SpO2 std dev' : SpO2std,
'c_SpO2 CoV' : SpO2cv,
}, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
StO2lin = pd.DataFrame({
'a_StO2 mean': StO2mean,
'b_StO2 std dev' : StO2std,
'c_StO2 CoV' : StO2cv,
}, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
FTOElin = pd.DataFrame({
'a_FTOE mean': FTOEmean,
'b_FTOE std dev' : FTOEstd,
'c_FTOE CoV' : FTOEcv,
}, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
dfCoV = pd.DataFrame({
'PI CoV' : PIcv,
'PR CoV' : PRcv,
'SpO2 CoV' : SpO2cv,
'StO2 CoV' : StO2cv,
'FTOE CoV' : FTOEcv
}, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
return PIlin, PRlin, SpO2lin, StO2lin, FTOElin, dfCoV
In [106]:
PIlin, PRlin, SpO2lin, StO2lin, FTOElin, dfCoV = linearmeasures(dfenlst) #USE dfenlst
In [107]:
from IPython.display import display
display(PIlin)
display(PRlin)
display(SpO2lin)
display(StO2lin)
display(FTOElin)
In [108]:
dfCoV.plot() #CV graph
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()
In [109]:
ar = dfen[tw['tw0']:0].corr()
#excludes NaN values
#pearson correlation coefficeint
#pearson, kendall or spearman?
br = dfen[0:tw['tw1']].corr()
cr = dfen[tw['tw1']:tw['tw2']].corr()
dr = dfen[tw['tw2']:tw['tw3']].corr()
er = dfen[tw['tw3']:tw['tw4']].corr()
fr = dfen[tw['tw4']:tw['tw5']].corr()
gr = dfen[tw['tw5']:tw['tw6']].corr()
hr = dfen[tw['tw6']:tw['tw7']].corr()
ir = dfen[tw['tw7']:tw['tw8']].corr()
dfcorrlst = [ar, br, cr, dr, er, fr, gr, hr, ir]
In [110]:
# 0 1 2 3 4
# PI, PR, SpO2, StO2, FTOE
# PI aa ab ac ad ae
# PR ba bb bc bd be
# SpO2 ca cb cc cd ce
# StO2 da db dc dd de
# FTOE ea eb ec ed ee
def corrlst(dflst):
a = [] #ab
b = [] #ac
c = [] #ad
d = [] #ae
e = [] #bc
f = [] #bd
g = [] #be
h = [] #cd
i = [] #ce
j = [] #de
for x in dflst:
ab = x['PI'][1] #ab
a.append(ab)
ac = x['PI'][2] #ac
b.append(ac)
ad = x['PI'][3] #ad
c.append(ad)
ae = x['PI'][4] #ae
d.append(ae)
bc = x['PR'][2] #bc
e.append(bc)
bd = x['PR'][3] #bd
f.append(bd)
be = x['PR'][4] #be
g.append(be)
cd = x['SpO2'][3] #cd
h.append(cd)
ce = x['SpO2'][4] #ce
i.append(ce)
de = x['StO2'][4] #de
j.append(de)
rgraph = pd.DataFrame({
'PI:PR': a,
'PI:SpO2' : b,
'PI:StO2' : c,
'PI:FTOE' : d,
'PR:SpO2' : e,
'PR:StO2' : f,
'PR:FTOE' : g,
'SpO2:StO2' : h,
'SpO2:FTOE' : i,
'StO2:FTOE': j,
}, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
return rgraph
In [111]:
rgraph = corrlst(dfcorrlst)
In [112]:
PIg = rgraph.ix[:,0:4]
PRg = rgraph[['PI:PR', 'PR:FTOE', 'PR:SpO2', 'PR:StO2']]
SpO2g = rgraph[['PI:SpO2', 'PR:SpO2', 'SpO2:StO2', 'SpO2:FTOE']]
StO2g = rgraph[['PI:StO2', 'PR:StO2', 'SpO2:StO2', 'StO2:FTOE']]
FTOEg = rgraph[['PI:FTOE', 'PR:FTOE', 'SpO2:FTOE', 'StO2:FTOE']]
In [113]:
display(PIg, PRg, SpO2g, StO2g, FTOEg)
In [114]:
PIg.plot(grid=True)
plt.hlines(0,0,8,colors='k', lw=5)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()
In [115]:
PRg.plot(grid=True)
plt.hlines(0,0,8,colors='k', lw=5)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()
In [116]:
SpO2g.plot(grid=True)
plt.hlines(0,0,8,colors='k', lw=5)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()
In [117]:
StO2g.plot(grid=True)
plt.hlines(0,0,8,colors='k', lw=5)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()
In [118]:
FTOEg.plot(grid=True)
plt.hlines(0,0,8,colors='k', lw=5)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()
Plots comparing baseline and 3H after ROP Exam
In [119]:
def savitzky_golay(y, window_size, order, deriv=0, rate=1):
r"""Smooth (and optionally differentiate) data with a Savitzky-Golay filter.
The Savitzky-Golay filter removes high frequency noise from data.
It has the advantage of preserving the original shape and
features of the signal better than other types of filtering
approaches, such as moving averages techniques.
Parameters
----------
y : array_like, shape (N,)
the values of the time history of the signal.
window_size : int
the length of the window. Must be an odd integer number.
order : int
the order of the polynomial used in the filtering.
Must be less then `window_size` - 1.
deriv: int
the order of the derivative to compute (default = 0 means only smoothing)
Returns
-------
ys : ndarray, shape (N)
the smoothed signal (or it's n-th derivative).
Notes
-----
The Savitzky-Golay is a type of low-pass filter, particularly
suited for smoothing noisy data. The main idea behind this
approach is to make for each point a least-square fit with a
polynomial of high order over a odd-sized window centered at
the point.
Examples
--------
t = np.linspace(-4, 4, 500)
y = np.exp( -t**2 ) + np.random.normal(0, 0.05, t.shape)
ysg = savitzky_golay(y, window_size=31, order=4)
import matplotlib.pyplot as plt
plt.plot(t, y, label='Noisy signal')
plt.plot(t, np.exp(-t**2), 'k', lw=1.5, label='Original signal')
plt.plot(t, ysg, 'r', label='Filtered signal')
plt.legend()
plt.show()
References
----------
.. [1] A. Savitzky, M. J. E. Golay, Smoothing and Differentiation of
Data by Simplified Least Squares Procedures. Analytical
Chemistry, 1964, 36 (8), pp 1627-1639.
.. [2] Numerical Recipes 3rd Edition: The Art of Scientific Computing
W.H. Press, S.A. Teukolsky, W.T. Vetterling, B.P. Flannery
Cambridge University Press ISBN-13: 9780521880688
"""
import numpy as np
from math import factorial
try:
window_size = np.abs(np.int(window_size))
order = np.abs(np.int(order))
except ValueError, msg:
raise ValueError("window_size and order have to be of type int")
if window_size % 2 != 1 or window_size < 1:
raise TypeError("window_size size must be a positive odd number")
if window_size < order + 2:
raise TypeError("window_size is too small for the polynomials order")
order_range = range(order+1)
half_window = (window_size -1) // 2
# precompute coefficients
b = np.mat([[k**i for i in order_range] for k in range(-half_window, half_window+1)])
m = np.linalg.pinv(b).A[deriv] * rate**deriv * factorial(deriv)
# pad the signal at the extremes with
# values taken from the signal itself
firstvals = y[0] - np.abs( y[1:half_window+1][::-1] - y[0] )
lastvals = y[-1] + np.abs(y[-half_window-1:-1][::-1] - y[-1])
y = np.concatenate((firstvals, y, lastvals))
return np.convolve( m[::-1], y, mode='valid')
In [120]:
ypsg = savitzky_golay(dfen['SpO2'].values, 31, order=4, deriv=0, rate=1)
yrsg = savitzky_golay(dfen['PR'].values, 31, order=4, deriv=0, rate=1)
ytsg = savitzky_golay(dfen['StO2'].values, 31, order=4, deriv=0, rate=1)
yi = dfen['PI'].values #don't filter PI values
yf = dfen['FTOE'].values #don't filter FTOE
dfgraph = pd.DataFrame({'SpO2':ypsg, 'PR':yrsg, 'StO2':ytsg, 'PI':yi, 'FTOE':yf}, index=dfen.index)
In [121]:
fig, axes = plt.subplots(nrows=2, ncols=1)
dfgraph['StO2'][-10800:0].plot(ax=axes[0]); axes[0].set_title('3h Baseline StO2');
dfgraph['StO2'][0:10800].plot(ax=axes[1], color = 'r'); axes[1].set_title('3h After Exam StO2');
plt.tight_layout()
plt.show()
In [122]:
fig, axes = plt.subplots(nrows=2, ncols=1)
dfgraph['SpO2'][-10800:0].plot(ax=axes[0]); axes[0].set_title('3h Baseline SpO2')
dfgraph['SpO2'][0:10800].plot(ax=axes[1], color = 'r'); axes[1].set_title('3h After Exam SpO2');
plt.tight_layout()
plt.show()
In [123]:
fig, axes = plt.subplots(nrows=2, ncols=1)
dfgraph['PR'][-10800:0].plot(ax=axes[0]); axes[0].set_title('3h Baseline PR');
dfgraph['PR'][0:10800].plot(ax=axes[1], color = 'r'); axes[1].set_title('3h After Exam PR');
plt.tight_layout()
plt.show()
In [124]:
fig, axes = plt.subplots(nrows=2, ncols=1)
dfgraph['PI'][-10800:0].plot(ax=axes[0]); axes[0].set_title('3h Baseline PI');
dfgraph['PI'][0:10800].plot(ax=axes[1], color = 'r'); axes[1].set_title('After Exam PI');
plt.tight_layout()
plt.show()
In [125]:
fig, axes = plt.subplots(nrows=2, ncols=1)
dfgraph['FTOE'][-10800:0].plot(ax=axes[0]); axes[0].set_title('3h Baseline PI');
dfgraph['FTOE'][0:10800].plot(ax=axes[1], color = 'r'); axes[1].set_title('After Exam PI');
plt.tight_layout()
plt.show()
In [126]:
from entropy import *
#def fuzzyen(x, dim, r, n, scale=True):
# return entropy(x, dim, r, n=n, scale=scale, remove_baseline=True)
def FuzzyEnLst(dfenlstcopy1):
PI_FE = []
PR_FE = []
SpO2_FE = []
StO2_FE = []
FTOE_FE = []
for i in dfenlstcopy1:
a = fuzzyen(i['PI'].values, 2, .2*np.nanstd(i['PI'].values), n=1)
b = fuzzyen(i['PR'].values, 2, .2*np.nanstd(i['PR'].values), n=1)
c = fuzzyen(i['SpO2'].values, 2, .2*np.nanstd(i['SpO2'].values), n=1)
d = fuzzyen(i['StO2'].values, 2, .2*np.nanstd(i['StO2'].values), n=1)
e = fuzzyen(i['FTOE'].values, 2, .2*np.nanstd(i['FTOE'].values), n=1)
PI_FE.append(a)
PR_FE.append(b)
SpO2_FE.append(c)
StO2_FE.append(d)
FTOE_FE.append(e)
FuzzyEnDf = pd.DataFrame({
'PI FuzzyEn': PI_FE,
'PR FuzzyEn' : PR_FE,
'SpO2 FuzzyEn' : SpO2_FE,
'StO2 FuzzyEn' : StO2_FE,
'FTOE FuzzyEn' : FTOE_FE
}, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
return FuzzyEnDf
In [127]:
FuzzyEnDf = FuzzyEnLst(dfenlstcopy1)
In [ ]:
FuzzyEnDf['PI FuzzyEn'].plot(grid=True)
In [ ]:
FuzzyEnDf['PR FuzzyEn'].plot(grid=True)
In [ ]:
FuzzyEnDf['SpO2 FuzzyEn'].plot(grid=True)
In [ ]:
FuzzyEnDf['StO2 FuzzyEn'].plot(grid=True)
In [ ]:
FuzzyEnDf['FTOE FuzzyEn'].plot(grid=True)
In [ ]:
FuzzyEnDf.plot(grid=True)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
In [ ]:
#def cross_fuzzyen(x1, x2, m, r, n, scale=True):
# return entropy([x1, x2], dim, r, n, scale=scale, remove_baseline=True)
def CrossFuzzyLst(dfenlstcopy2):
#Cross Fuzzy Entropy between variables
a = [] #ab
b = [] #ac
c = [] #ad
d = [] #ae
e = [] #bc
f = [] #bd
g = [] #be
h = [] #cd
i = [] #ce
j = [] #de
rlst = [a, b, c, d, e, f, g, h, i, j]
# 0 1 2 3 4
# PI, PR, SpO2, StO2, FTOE
# PI aa ab ac ad ae
# PR ba bb bc bd be
# SpO2 ca cb cc cd ce
# StO2 da db dc dd de
# FTOE ea eb ec ed ee
for x in dfenlstcopy2:
ab = cross_fuzzyen(x['PI'].values, x['PR'].values, 2, .2, n=1) #ab
a.append(ab)
ac = cross_fuzzyen(x['PI'].values, x['SpO2'].values, 2, .2, n=1) #ac
b.append(ac)
ad = cross_fuzzyen(x['PI'].values, x['StO2'].values, 2, .2, n=1) #ad
c.append(ad)
ae = cross_fuzzyen(x['PI'].values, x['FTOE'].values, 2, .2, n=1) #ae
d.append(ae)
bc = cross_fuzzyen(x['PR'].values, x['SpO2'].values, 2, .2, n=1) #bc
e.append(bc)
bd = cross_fuzzyen(x['PR'].values, x['StO2'].values, 2, .2, n=1) #bd
f.append(bd)
be = cross_fuzzyen(x['PR'].values, x['FTOE'].values, 2, .2, n=1) #be
g.append(be)
cd = cross_fuzzyen(x['SpO2'].values, x['StO2'].values, 2, .2, n=1) #cd
h.append(cd)
ce = cross_fuzzyen(x['SpO2'].values, x['FTOE'].values, 2, .2, n=1) #ce
i.append(ce)
de = cross_fuzzyen(x['StO2'].values, x['FTOE'].values, 2, .2, n=1) #de
j.append(de)
XFuzzyEnDf = pd.DataFrame({
'xf PI:PR': a,
'xf PI:SpO2' : b,
'xf PI:StO2' : c,
'xf PI:FTOE' : d,
'xf PR:SpO2' : e,
'xf PR:StO2' : f,
'xf PR:FTOE' : g,
'xf SpO2:StO2' : h,
'xf SpO2:FTOE' : i,
'xf StO2:FTOE': j,
}, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
return XFuzzyEnDf
In [ ]:
xengraph = CrossFuzzyLst(dfenlstcopy2) #error up to 4 decimal places
In [ ]:
PIxg = xengraph.ix[:,0:4]
PRxg = xengraph[['xf PI:PR', 'xf PR:FTOE', 'xf PR:SpO2', 'xf PR:StO2']]
SpO2xg = xengraph[['xf PI:SpO2', 'xf PR:SpO2', 'xf SpO2:StO2', 'xf SpO2:FTOE']]
StO2xg = xengraph[['xf PI:StO2', 'xf PR:StO2', 'xf SpO2:StO2', 'xf StO2:FTOE']]
FTOExg = xengraph[['xf PI:FTOE', 'xf PR:FTOE', 'xf SpO2:FTOE', 'xf StO2:FTOE']]
In [ ]:
display(PIxg, PRxg, SpO2xg, StO2xg, FTOExg)
In [ ]:
PIxg.plot(grid=True)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()
In [ ]:
PRxg.plot(grid=True)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()
In [ ]:
SpO2xg.plot(grid=True)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()
In [ ]:
StO2xg.plot(grid=True)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()
In [ ]:
FTOExg.plot(grid=True)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()
In [ ]:
def OrderDetn(x1, x2):
x1
#Takes the length of two arrays and
#arranges them from least to greatest
#return (smaller x, bigger x)
if len(x1) <= len(x2):
return x1, x2
else:
return x2, x1
In [ ]:
#def cross_fuzzyen(x1, x2, m, r, n, scale=True):
# return entropy([x1, x2], dim, r, n, scale=scale, remove_baseline=True)
def CrossFuzzyLst(dfenlstcopy3):
#cross entropy of baseline across different time epochs
#comparison across the same variable
#len(x1) <= len(x2) for it to work
PIxbl = []
PRxbl = []
SpO2xbl = []
StO2xbl = []
FTOExbl = []
for i in np.arange(0, 9): #9 time epochs
a1, a2 = OrderDetn(dfenlstcopy3[0]['PI'].values, dfenlstcopy3[i]['PI'].values)
b1, b2 = OrderDetn(dfenlstcopy3[0]['PR'].values, dfenlstcopy3[i]['PR'].values)
c1, c2 = OrderDetn(dfenlstcopy3[0]['SpO2'].values, dfenlstcopy3[i]['SpO2'].values)
d1, d2 = OrderDetn(dfenlstcopy3[0]['StO2'].values, dfenlstcopy3[i]['StO2'].values)
e1, e2 = OrderDetn(dfenlstcopy3[0]['FTOE'].values, dfenlstcopy3[i]['FTOE'].values)
a = cross_fuzzyen(a1, a2, 2, .2, n=1)
b = cross_fuzzyen(b1, b2, 2, .2, n=1)
c = cross_fuzzyen(c1, c2, 2, .2, n=1)
d = cross_fuzzyen(d1, d2, 2, .2, n=1)
e = cross_fuzzyen(e1, e2, 2, .2, n=1)
PIxbl.append(a)
PRxbl.append(b)
SpO2xbl.append(c)
StO2xbl.append(d)
FTOExbl.append(e)
XblFuzzyEnDf = pd.DataFrame({
'BL X FuzzyEn PI': PIxbl,
'BL X FuzzyEn PR' : PRxbl,
'BL X FuzzyEn SpO2' : SpO2xbl,
'BL X FuzzyEn StO2' : StO2xbl,
'BL X FuzzyEn FTOE' : FTOExbl,
}, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
return XblFuzzyEnDf
In [ ]:
xblengraph = CrossFuzzyLst(dfenlstcopy3)
In [ ]:
xblengraph
In [ ]:
xblengraph.plot(grid=True)
plt.title('Cross Fuzzy Entropy', color='black')
plt.tight_layout()
plt.axvline(1, color='y', linestyle=':', lw='4') #vertical line at ROP Exam time
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()
In [ ]:
# Scripts for Chaos Theory based variables
# Import from other scientists
#
# #1
from analysis import *
#Source: https://github.com/thelahunginjeet/pydynet/blob/master/analysis.py
#def lz_complexity(s):
#Aboy 2006: turn into two bit data based off threshold (threshold = median)
#turn data into a string of 0's and 1's
# #2
def LLEcal(data):
#Source: http://systems-sciences.uni-graz.at/etextbook/sw2/lyapunov.html
result = []
lambdas = []
# loop through data
for x in data:
#take the log of the absolute of the data
result.append(np.log(abs(x))) #np.log = ln
# take average
lambdas.append(mean(result))
return max(lambdas)
# #4
def hurst_mod(data):
#Source: http://pyeeg.sourceforge.net/index.html?highlight=hurst#pyeeg.hurst
#modified lstsq line to to start from third iterable
X = data
N = len(X)
T = array([float(i) for i in xrange(1,N+1)])
Y = cumsum(X)
Ave_T = Y/T
S_T = zeros((N))
R_T = zeros((N))
for i in xrange(N):
S_T[i] = std(X[:i+1])
X_T = Y - T * Ave_T[i]
R_T[i] = max(X_T[:i + 1]) - min(X_T[:i + 1])
R_S = R_T / S_T
R_S = log(R_S)
n = log(T).reshape(N, 1)
#remove NaNs from both R_S and n
rrr = np.ndarray.flatten(n)
dfxxx = pd.DataFrame({
'n': rrr,
'R_S' : R_S,
})
dfxxx = dfxxx.dropna(how='any')
N = len(dfxxx)
n = dfxxx['n'].values.reshape(N, 1)
R_S = dfxxx['R_S'].values
H = lstsq(n[1:], R_S[1:])[0]
return H[0]
In [ ]:
def ChaosVars(dflst, col): #or dfenlst if it doesn't work.
#make sure you put in a['PI'] etc.
#tau = 1 + n #embedding lag
# n = m from fnn function, embedding dimension
fs = 0.5 #sampling frequency, 1/2
LZlst = []
LLElst = []
Hlst = []
for i in dflst:
data = i[col].values
datalz = []
thresh = np.nanmedian(data)
for i in data:
if i < thresh:
datalz.append(0)
else:
datalz.append(1)
strlz = ''.join(str(x) for x in datalz)
a = ((1.0*lz_complexity(strlz))/random_lz_complexity(len(strlz),p=0.5)) #normalize
LZlst.append(a)
b = LLEcal(data)
LLElst.append(b)
try:
d = hurst_mod(data)
Hlst.append(d)
except ValueError:
Hlst.append(np.nan)
dfchaos = pd.DataFrame({
col + ' LZ': LZlst,
col + ' LLE' : LLElst,
col + ' H' : Hlst,
}, index = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'])
return dfchaos
In [ ]:
dfcPI = ChaosVars(dfenlst, 'PI')
dfcPR = ChaosVars(dfenlst, 'PR')
dfcSpO2 = ChaosVars(dfenlst, 'SpO2')
dfcStO2 = ChaosVars(dfenlst, 'StO2')
dfcFTOE = ChaosVars(dfenlst, 'FTOE')
In [ ]:
display(dfcPI, dfcPR, dfcSpO2, dfcStO2, dfcFTOE)