For Pulse Ox. Analysis, make sure the data file is the right .csv format:
a) Headings on Row 1
b) Open the csv file through Notepad or TextEdit and delete extra
row commas (non-printable characters)
c) There are always Dates in Column A and Time in Column B.
d) There might be a row that says "Time Gap Present". Delete this row from Notepad
or TextEdit
In [40]:
#the usual beginning
import pandas as pd
import numpy as np
from pandas import Series, DataFrame
from datetime import datetime, timedelta
from pandas import concat
#define any string with 'C' as NaN
def readD(val):
if 'C' in val:
return np.nan
return val
In [41]:
df = pd.read_csv('/Users/John/Dropbox/LLU/ROP/Pulse Ox/ROP006PO.csv',
parse_dates={'timestamp': ['Date','Time']},
index_col='timestamp',
usecols=['Date', 'Time', 'SpO2', 'PR', 'PI', 'Exceptions'],
na_values=['0'],
converters={'Exceptions': readD}
)
#parse_dates tells the read_csv function to combine the date and time column
#into one timestamp column and parse it as a timestamp.
# pandas is smart enough to know how to parse a date in various formats
#index_col sets the timestamp column to be the index.
#usecols tells the read_csv function to select only the subset of the columns.
#na_values is used to turn 0 into NaN
#converters: readD is the dict that means any string with 'C' with be NaN (for PI)
In [42]:
#dfclean = df[27:33][df[27:33].loc[:, ['SpO2', 'PR', 'PI', 'Exceptions']].apply(pd.notnull).all(1)]
#clean the dataframe to get rid of rows that have NaN for PI purposes
df_clean = df[df.loc[:, ['PI', 'Exceptions']].apply(pd.notnull).all(1)]
In [43]:
"""Pulse ox date/time is 1 mins and 32 seconds faster than phone. Have to correct for it."""
TC = timedelta(minutes=1, seconds=32)
In [44]:
df_first = df.first_valid_index() #get the first number from index
Y = pd.to_datetime(df_first) #convert index to datetime
# Y = TIME DATA COLLECTION BEGAN / First data point on CSV
# SYNTAX:
# datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]])
W = datetime(2015, 6, 17, 7, 10)+TC
# W = first eye drop dtarts
X = datetime(2015, 6, 17, 8, 36)+TC
# X = ROP Exam Started
Z = datetime(2015, 6, 17, 8, 39)+TC
# Z = ROP Exam Ended
df_last = df.last_valid_index() #get the last number from index
Q = pd.to_datetime(df_last)
# Q = TIME DATA COLLECTION ENDED / Last Data point on CSV
In [45]:
avg0PI = df_clean.PI[Y:W].mean()
avg0O2 = df.SpO2[Y:W].mean()
avg0PR = df.PR[Y:W].mean()
print 'Baseline Averages\n', 'PI :\t',avg0PI, '\nSpO2 :\t',avg0O2,'\nPR :\t',avg0PR,
#df.std() for standard deviation
In [46]:
# Every 5 min Average from start of eye drops to start of exam
def perdeltadrop(start, end, delta):
rdrop = []
curr = start
while curr < end:
rdrop.append(curr)
curr += delta
return rdrop
dfdropPI = df_clean.PI[W:W+timedelta(hours=1)]
dfdropO2 = df.SpO2[W:W+timedelta(hours=1)]
dfdropPR = df.PR[W:W+timedelta(hours=1)]
windrop = timedelta(minutes=5)#make the range
rdrop = perdeltadrop(W, W+timedelta(hours=1), windrop)
avgdropPI = Series(index = rdrop, name = 'PI DurEyeD')
avgdropO2 = Series(index = rdrop, name = 'SpO2 DurEyeD')
avgdropPR = Series(index = rdrop, name = 'PR DurEyeD')
for i in rdrop:
avgdropPI[i] = dfdropPI[i:(i+windrop)].mean()
avgdropO2[i] = dfdropO2[i:(i+windrop)].mean()
avgdropPR[i] = dfdropPR[i:(i+windrop)].mean()
resultdrops = concat([avgdropPI, avgdropO2, avgdropPR], axis=1, join='inner')
print resultdrops
In [47]:
#AVERAGE DURING ROP EXAM FOR FIRST FOUR MINUTES
def perdelta1(start, end, delta):
r1 = []
curr = start
while curr < end:
r1.append(curr)
curr += delta
return r1
df1PI = df_clean.PI[X:X+timedelta(minutes=4)]
df1O2 = df.SpO2[X:X+timedelta(minutes=4)]
df1PR = df.PR[X:X+timedelta(minutes=4)]
win1 = timedelta(seconds=10) #any unit of time & make the range
r1 = perdelta1(X, X+timedelta(minutes=4), win1)
#make the series to store
avg1PI = Series(index = r1, name = 'PI DurEx')
avg1O2 = Series(index = r1, name = 'SpO2 DurEx')
avg1PR = Series(index = r1, name = 'PR DurEX')
#average!
for i1 in r1:
avg1PI[i1] = df1PI[i1:(i1+win1)].mean()
avg1O2[i1] = df1O2[i1:(i1+win1)].mean()
avg1PR[i1] = df1PR[i1:(i1+win1)].mean()
result1 = concat([avg1PI, avg1O2, avg1PR], axis=1, join='inner')
print result1
In [48]:
#AVERAGE EVERY 5 MINUTES ONE HOUR AFTER ROP EXAM
def perdelta2(start, end, delta):
r2 = []
curr = start
while curr < end:
r2.append(curr)
curr += delta
return r2
# datetime(year, month, day, hour, etc.)
df2PI = df_clean.PI[Z:(Z+timedelta(hours=1))]
df2O2 = df.SpO2[Z:(Z+timedelta(hours=1))]
df2PR = df.PR[Z:(Z+timedelta(hours=1))]
win2 = timedelta(minutes=5) #any unit of time, make the range
r2 = perdelta2(Z, (Z+timedelta(hours=1)), win2) #define the average using function
#make the series to store
avg2PI = Series(index = r2, name = 'PI q5MinHr1')
avg2O2 = Series(index = r2, name = 'O2 q5MinHr1')
avg2PR = Series(index = r2, name = 'PR q5MinHr1')
#average!
for i2 in r2:
avg2PI[i2] = df2PI[i2:(i2+win2)].mean()
avg2O2[i2] = df2O2[i2:(i2+win2)].mean()
avg2PR[i2] = df2PR[i2:(i2+win2)].mean()
result2 = concat([avg2PI, avg2O2, avg2PR], axis=1, join='inner')
print result2
In [49]:
#AVERAGE EVERY 15 MINUTES TWO HOURS AFTER ROP EXAM
def perdelta3(start, end, delta):
r3 = []
curr = start
while curr < end:
r3.append(curr)
curr += delta
return r3
# datetime(year, month, day, hour, etc.)
df3PI = df_clean.PI[(Z+timedelta(hours=1)):(Z+timedelta(hours=2))]
df3O2 = df.SpO2[(Z+timedelta(hours=1)):(Z+timedelta(hours=2))]
df3PR = df.PR[(Z+timedelta(hours=1)):(Z+timedelta(hours=2))]
win3 = timedelta(minutes=15) #any unit of time, make the range
r3 = perdelta3((Z+timedelta(hours=1)), (Z+timedelta(hours=2)), win3)
#make the series to store
avg3PI = Series(index = r3, name = 'PI q15MinHr2')
avg3O2 = Series(index = r3, name = 'O2 q15MinHr2')
avg3PR = Series(index = r3, name = 'PR q15MinHr2')
#average!
for i3 in r3:
avg3PI[i3] = df3PI[i3:(i3+win3)].mean()
avg3O2[i3] = df3O2[i3:(i3+win3)].mean()
avg3PR[i3] = df3PR[i3:(i3+win3)].mean()
result3 = concat([avg3PI, avg3O2, avg3PR], axis=1, join='inner')
print result3
In [50]:
#AVERAGE EVERY 30 MINUTES THREE HOURS AFTER ROP EXAM
def perdelta4(start, end, delta):
r4 = []
curr = start
while curr < end:
r4.append(curr)
curr += delta
return r4
# datetime(year, month, day, hour, etc.)
df4PI = df_clean.PI[(Z+timedelta(hours=2)):(Z+timedelta(hours=3))]
df4O2 = df.SpO2[(Z+timedelta(hours=2)):(Z+timedelta(hours=3))]
df4PR = df.PR[(Z+timedelta(hours=2)):(Z+timedelta(hours=3))]
win4 = timedelta(minutes=30) #any unit of time, make the range
r4 = perdelta4((Z+timedelta(hours=2)), (Z+timedelta(hours=3)), win4)
#make the series to store
avg4PI = Series(index = r4, name = 'PI q30MinHr3')
avg4O2 = Series(index = r4, name = 'O2 q30MinHr3')
avg4PR = Series(index = r4, name = 'PR q30MinHr3')
#average!
for i4 in r4:
avg4PI[i4] = df4PI[i4:(i4+win4)].mean()
avg4O2[i4] = df4O2[i4:(i4+win4)].mean()
avg4PR[i4] = df4PR[i4:(i4+win4)].mean()
result4 = concat([avg4PI, avg4O2, avg4PR], axis=1, join='inner')
print result4
In [51]:
#AVERAGE EVERY 60 MINUTES 4-24 HOURS AFTER ROP EXAM
def perdelta5(start, end, delta):
r5 = []
curr = start
while curr < end:
r5.append(curr)
curr += delta
return r5
# datetime(year, month, day, hour, etc.)
df5PI = df_clean.PI[(Z+timedelta(hours=3)):(Z+timedelta(hours=24))]
df5O2 = df.SpO2[(Z+timedelta(hours=3)):(Z+timedelta(hours=24))]
df5PR = df.PR[(Z+timedelta(hours=3)):(Z+timedelta(hours=24))]
win5 = timedelta(minutes=60) #any unit of time, make the range
r5 = perdelta5((Z+timedelta(hours=3)), (Z+timedelta(hours=24)), win5)
#make the series to store
avg5PI = Series(index = r5, name = 'PI q60MinHr4+')
avg5O2 = Series(index = r5, name = 'O2 q60MinHr4+')
avg5PR = Series(index = r5, name = 'PR q60MinHr4+')
#average!
for i5 in r5:
avg5PI[i5] = df5PI[i5:(i5+win5)].mean()
avg5O2[i5] = df5O2[i5:(i5+win5)].mean()
avg5PR[i5] = df5PR[i5:(i5+win5)].mean()
result5 = concat([avg5PI, avg5O2, avg5PR], axis=1, join='inner')
print result5
In [52]:
df_O2_pre = df[Y:W]
#Find count of these ranges
below = 0 # v <=80
middle = 0 #v >= 81 and v<=84
above = 0 #v >=85 and v<=89
ls = []
b_dict = {}
m_dict = {}
a_dict = {}
for i, v in df_O2_pre['SpO2'].iteritems():
if v <= 80: #below block
if not ls:
ls.append(v)
else:
if ls[0] >= 81: #if the range before was not below 80
if len(ls) >= 5: #if the range was greater than 10 seconds, set to 5 because data points are every 2
if ls[0] <= 84: #was it in the middle range?
m_dict[middle] = ls
middle += 1
ls = [v]
elif ls[0] >= 85 and ls[0] <=89: #was it in the above range?
a_dict[above] = ls
above += 1
ls = [v]
else: #old list wasn't long enough to count
ls = [v]
else: #if in the same range
ls.append(v)
elif v >= 81 and v<= 84: #middle block
if not ls:
ls.append(v)
else:
if ls[0] <= 80 or (ls[0]>=85 and ls[0]<= 89): #if not in the middle range
if len(ls) >= 5: #if range was greater than 10 seconds
if ls[0] <= 80: #was it in the below range?
b_dict[below] = ls
below += 1
ls = [v]
elif ls[0] >= 85 and ls[0] <=89: #was it in the above range?
a_dict[above] = ls
above += 1
ls = [v]
else: #old list wasn't long enough to count
ls = [v]
else:
ls.append(v)
elif v >= 85 and v <=89: #above block
if not ls:
ls.append(v)
else:
if ls[0] <=84 : #if not in the above range
if len(ls) >= 5: #if range was greater than
if ls[0] <= 80: #was it in the below range?
b_dict[below] = ls
below += 1
ls = [v]
elif ls[0] >= 81 and ls[0] <=84: #was it in the middle range?
m_dict[middle] = ls
middle += 1
ls = [v]
else: #old list wasn't long enough to count
ls = [v]
else:
ls.append(v)
else: #v>90 or something else weird. start the list over
ls = []
#final list check
if len(ls) >= 5:
if ls[0] <= 80: #was it in the below range?
b_dict[below] = ls
below += 1
ls = [v]
elif ls[0] >= 81 and ls[0] <=84: #was it in the middle range?
m_dict[middle] = ls
middle += 1
ls = [v]
elif ls[0] >= 85 and ls[0] <=89: #was it in the above range?
a_dict[above] = ls
above += 1
b_len = 0.0
for key, val in b_dict.iteritems():
b_len += len(val)
m_len = 0.0
for key, val in m_dict.iteritems():
m_len += len(val)
a_len = 0.0
for key, val in a_dict.iteritems():
a_len += len(val)
In [53]:
#post exam duraiton length analysis
df_O2_post = df[Z:Q]
#Find count of these ranges
below2 = 0 # v <=80
middle2= 0 #v >= 81 and v<=84
above2 = 0 #v >=85 and v<=89
ls2 = []
b_dict2 = {}
m_dict2 = {}
a_dict2 = {}
for i2, v2 in df_O2_post['SpO2'].iteritems():
if v2 <= 80: #below block
if not ls2:
ls2.append(v2)
else:
if ls2[0] >= 81: #if the range before was not below 80
if len(ls2) >= 5: #if the range was greater than 10 seconds, set to 5 because data points are every 2
if ls2[0] <= 84: #was it in the middle range?
m_dict2[middle2] = ls2
middle2 += 1
ls2 = [v2]
elif ls2[0] >= 85 and ls2[0] <=89: #was it in the above range?
a_dict2[above2] = ls2
above2 += 1
ls2 = [v2]
else: #old list wasn't long enough to count
ls2 = [v2]
else: #if in the same range
ls2.append(v2)
elif v2 >= 81 and v2<= 84: #middle block
if not ls2:
ls2.append(v2)
else:
if ls2[0] <= 80 or (ls2[0]>=85 and ls2[0]<= 89): #if not in the middle range
if len(ls2) >= 5: #if range was greater than 10 seconds
if ls2[0] <= 80: #was it in the below range?
b_dict2[below2] = ls2
below2 += 1
ls2 = [v2]
elif ls2[0] >= 85 and ls2[0] <=89: #was it in the above range?
a_dict2[above2] = ls2
above2 += 1
ls2 = [v2]
else: #old list wasn't long enough to count
ls2 = [v2]
else:
ls2.append(v2)
elif v2 >= 85 and v2 <=89: #above block
if not ls2:
ls2.append(v2)
else:
if ls2[0] <=84 : #if not in the above range
if len(ls2) >= 5: #if range was greater than
if ls2[0] <= 80: #was it in the below range?
b_dict2[below2] = ls2
below2 += 1
ls2 = [v2]
elif ls2[0] >= 81 and ls2[0] <=84: #was it in the middle range?
m_dict2[middle2] = ls2
middle2 += 1
ls2 = [v2]
else: #old list wasn't long enough to count
ls2 = [v2]
else:
ls2.append(v2)
else: #v2>90 or something else weird. start the list over
ls2 = []
#final list check
if len(ls2) >= 5:
if ls2[0] <= 80: #was it in the below range?
b_dict2[below2] = ls2
below2 += 1
ls2= [v2]
elif ls2[0] >= 81 and ls2[0] <=84: #was it in the middle range?
m_dict2[middle2] = ls2
middle2 += 1
ls2 = [v2]
elif ls2[0] >= 85 and ls2[0] <=89: #was it in the above range?
a_dict2[above2] = ls2
above2 += 1
b_len2 = 0.0
for key, val2 in b_dict2.iteritems():
b_len2 += len(val2)
m_len2 = 0.0
for key, val2 in m_dict2.iteritems():
m_len2 += len(val2)
a_len2 = 0.0
for key, val2 in a_dict2.iteritems():
a_len2 += len(val2)
In [54]:
#print results from count and min
print "Desat Counts for X mins\n"
print "Pre Mild Desat (85-89) Count: %s\t" %above, "for %s min" %((a_len*2)/60.)
print "Pre Mod Desat (81-84) Count: %s\t" %middle, "for %s min" %((m_len*2)/60.)
print "Pre Sev Desat (=< 80) Count: %s\t" %below, "for %s min\n" %((b_len*2)/60.)
print "Post Mild Desat (85-89) Count: %s\t" %above2, "for %s min" %((a_len2*2)/60.)
print "Post Mod Desat (81-84) Count: %s\t" %middle2, "for %s min" %((m_len2*2)/60.)
print "Post Sev Desat (=< 80) Count: %s\t" %below2, "for %s min\n" %((b_len2*2)/60.)
print "Data Recording Time!"
print '*' * 10
print "Pre-Exam Data Recording Length\t", X - Y # start of exam - first data point
print "Post-Exam Data Recording Length\t", Q - Z #last data point - end of exam
print "Total Data Recording Length\t", Q - Y #last data point - first data point
Pre = ['Pre',(X-Y)]
Post = ['Post',(Q-Z)]
Total = ['Total',(Q-Y)]
RTL = [Pre, Post, Total]
PreMild = ['Pre Mild Desats \t',(above), 'for', (a_len*2)/60., 'mins']
PreMod = ['Pre Mod Desats \t',(middle), 'for', (m_len*2)/60., 'mins']
PreSev = ['Pre Sev Desats \t',(below), 'for', (b_len*2)/60., 'mins']
PreDesats = [PreMild, PreMod, PreSev]
PostMild = ['Post Mild Desats \t',(above2), 'for', (a_len2*2)/60., 'mins']
PostMod = ['Post Mod Desats \t',(middle2), 'for', (m_len2*2)/60., 'mins']
PostSev = ['Post Sev Desats \t',(below2), 'for', (b_len2*2)/60., 'mins']
PostDesats = [PostMild, PostMod, PostSev]
#creating a list for recording time length
In [55]:
#did it count check sort correctly? get rid of the ''' if you want to check your values
'''
print "Mild check"
for key, val in b_dict.iteritems():
print all(i <=80 for i in val)
print "Moderate check"
for key, val in m_dict.iteritems():
print all(i >= 81 and i<=84 for i in val)
print "Severe check"
for key, val in a_dict.iteritems():
print all(i >= 85 and i<=89 for i in val)
'''
Out[55]:
In [56]:
import csv
class excel_tab(csv.excel):
delimiter = '\t'
csv.register_dialect("excel_tab", excel_tab)
with open('ROP006_PO.csv', 'w') as f: #CHANGE CSV FILE NAME
writer = csv.writer(f, dialect=excel_tab)
writer.writerow(['PI'])
writer.writerow([avg0PI])
for i in rdrop:
writer.writerow([avgdropPI[i]]) #NEEDS BRACKETS TO MAKE IT SEQUENCE
for i in r1:
writer.writerow([avg1PI[i]])
for i in r2:
writer.writerow([avg2PI[i]])
for i in r3:
writer.writerow([avg3PI[i]])
for i in r4:
writer.writerow([avg4PI[i]])
for i in r5:
writer.writerow([avg5PI[i]])
writer.writerow(['O2'])
writer.writerow([avg0O2])
for i in rdrop:
writer.writerow([avgdropO2[i]])
for i in r1:
writer.writerow([avg1O2[i]])
for i in r2:
writer.writerow([avg2O2[i]])
for i in r3:
writer.writerow([avg3O2[i]])
for i in r4:
writer.writerow([avg4O2[i]])
for i in r5:
writer.writerow([avg5O2[i]])
writer.writerow(['PR'])
writer.writerow([avg0PR])
for i in rdrop:
writer.writerow([avgdropPR[i]])
for i in r1:
writer.writerow([avg1PR[i]])
for i in r2:
writer.writerow([avg2PR[i]])
for i in r3:
writer.writerow([avg3PR[i]])
for i in r4:
writer.writerow([avg4PR[i]])
for i in r5:
writer.writerow([avg5PR[i]])
writer.writerow(['Data Recording Time Length'])
writer.writerows(RTL)
writer.writerow(['Pre Desat Counts for X Minutes'])
writer.writerows(PreDesats)
writer.writerow(['Post Dest Counts for X Minutes'])
writer.writerows(PostDesats)