In [1]:
import pandas as pd
import numpy as np
from IPython.display import display
from pylab import *
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from scipy import stats
import seaborn.apionly as sns
from tabulate import tabulate
%matplotlib inline
#plt.rcParams['figure.figsize'] = (20.0, 10.0) #set figure size
In [2]:
varlst = ['PI', 'PR', 'SpO2', 'StO2', 'FTOE'] #list of variables
twind = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'] #time window index
ind = list(np.arange(0, 250, 10)) #define index for 240
ind2= list(np.arange(0, 310, 10)) #define index for 5 mins
ind.insert(0, 'Baseline')
ind2.insert(0, 'Baseline')
In [3]:
import ROPtimes as rt
In [4]:
Babylst = ['ROP003', 'ROP005', 'ROP006', 'ROP007', 'ROP008', 'ROP009', 'ROP010',
'ROP011', 'ROP012', 'ROP013', 'ROP014', 'ROP015', 'ROP017', 'ROP018',
'ROP019', 'ROP020', 'ROP021', 'ROP022', 'ROP023', 'ROP025']
#update this as you go
In [5]:
def saveplot(filename):
plt.savefig('Summary Data/plots/'+filename)
def savecsv(filename, df):
df.to_csv('Summary Data/csv/'+filename+'.csv')
In [6]:
#d = {key: value for (key, value) in iterable}
In [7]:
def loadeyedrop(var, eyedrop, graph=False):
Babydflst = []
for Baby in Babylst:
try:
Babyframe = pd.DataFrame()
df = pd.read_csv('/Users/John/Desktop/ROP Python/Data/'+Baby+'/csv/'+
Baby+var+'_'+eyedrop+'_mean.csv', usecols=[var+' mean'],
index_col=None, header=0)
colname = Baby + ' ' + var + ' mean'
df.columns=[colname]
Babydflst.append(df)
BabyFrame = pd.concat(Babydflst, axis=1)
BabyFrame = BabyFrame.transpose()
BabyFrame.columns = ind2
if var == 'PI':
mask = np.greater(BabyFrame, 5)
BabyFrame[mask] = np.nan
except IOError:
pass
if graph == True:
BabyFrame.boxplot(return_type='axes')
plt.title(var + ' ' +eyedrop + ' Q Ten Seconds Eye Drops')
plt.show()
saveplot(var+'_'+eyedrop+'_summary_tensec')
#display(BabyFrame)
savecsv(var+'_'+eyedrop+'_summary_tensec', BabyFrame)
return BabyFrame
In [8]:
def loadtensec(var, graph=False):
Babydflst = []
for Baby in Babylst:
try:
Babyframe = pd.DataFrame()
df = pd.read_csv('/Users/John/Desktop/ROP Python/Data/'+Baby+'/csv/'+
Baby+var+'_mean_stddev_10sec.csv', usecols=[var+' mean'],
index_col=None, header=0)
colname = Baby + ' ' + var + ' mean'
df.columns=[colname]
Babydflst.append(df)
BabyFrame = pd.concat(Babydflst, axis=1)
BabyFrame = BabyFrame.transpose()
BabyFrame.columns = ind
if var == 'PI':
mask = np.greater(BabyFrame, 5)
BabyFrame[mask] = np.nan
except IOError:
pass
if graph == True:
BabyFrame.boxplot(return_type='axes')
plt.title(var + ' Q Ten Seconds From Start of ROP Exam')
plt.show()
saveplot(var+'_summary_tensec')
#display(BabyFrame)
#savecsv(var+'_summary_tensec', BabyFrame)
return BabyFrame
In [9]:
def xform_to_twind(dflst):
df = pd.concat(dflst, axis=1)
df = df.transpose()
df.columns = twind
return df
def twind_boxplot(df, var, label):
df.boxplot(return_type='axes')
plt.title(var + label)
plt.show()
return
In [10]:
def load24hrs(var, graph=False):
Babydflst = []
for Baby in Babylst:
try:
Babyframe = pd.DataFrame()
df = pd.read_csv('/Users/John/Desktop/ROP Python/Data/'+Baby+'/csv/'+
Baby+var+'_mean_stddev_24hrs.csv', usecols=[var+' mean'],
index_col=None, header=0)
df.columns=[Baby+' '+var+' mean']
Babydflst.append(df)
BabyFrame = xform_to_twind(Babydflst)
if var == 'PI':
mask = np.greater(BabyFrame, 5)
BabyFrame[mask] = np.nan
except IOError:
pass
if graph == True:
twind_boxplot(BabyFrame, var, ' for 24 hours')
#saveplot(var+'_24hrs_summ')
#display(BabyFrame)
#savecsv(var+'_24hrs_summ', BabyFrame)
return BabyFrame
In [11]:
def loadfuzzyen(var, graph=False):
Babydflst = []
for Baby in Babylst:
try:
df = pd.read_csv('/Users/John/Desktop/ROP Python/Data/'+Baby+'/csv/'+
Baby+var+'_FuzzyEn.csv', usecols=[1])
df.columns = [Baby + ' ' + var + ' FuzzyEn']
Babydflst.append(df)
BabyFrame = xform_to_twind(Babydflst)
if var == 'PI':
mask = np.greater(BabyFrame, 5)
BabyFrame[mask] = np.nan
except IOError:
pass
if graph == True:
twind_boxplot(BabyFrameFin, var, ' Fuzzy Entropy for 24 hours')
#saveplot(var+'_fuzzyen_summ')
#display(BabyFrame)
#savecsv(var+'_fuzzyen_summ', BabyFrame)
return BabyFrame
In [12]:
def loadLZcomp(var, graph=False):
Babydflst = []
for Baby in Babylst:
try:
df = pd.read_csv('/Users/John/Desktop/ROP Python/Data/'+Baby+'/csv/'+
Baby+var+'_LZComp.csv', usecols=[1])
Babydflst.append(df)
df.columns=[Baby+' '+var+' LZ Comp']
BabyFrame = xform_to_twind(Babydflst)
except IOError:
pass
if graph == True:
twind_boxplot(BabyFrame, var, ' Lempel-Ziv Complexity for 24 hours')
#saveplot(var+'_LZ_summ')
#display(BabyFrame)
#savecsv(var+'_LZ_summ', BabyFrame)
return BabyFrame
In [13]:
def loadLLE(var, graph=False):
Babydflst = []
for Baby in Babylst:
try:
df = pd.read_csv('/Users/John/Desktop/ROP Python/Data/'+Baby+'/csv/'+
Baby+var+'_LLE.csv', usecols=[1])
df.columns=[Baby+' '+var+' LLE']
Babydflst.append(df)
BabyFrame = xform_to_twind(Babydflst)
except IOError:
pass
if graph==True:
twind_boxplot(BabyFrame, var, ' Largest Lyapunov Exponent for 24 hours')
#saveplot(var+'_LLE_summ')
#display(BabyFrameFin)
#savecsv(var+'_LLE_summ', BabyFrame)
return BabyFrame
In [14]:
def loadHurst(var, graph=False):
Babydflst = []
for Baby in Babylst:
try:
df = pd.read_csv('/Users/John/Desktop/ROP Python/Data/'+Baby+'/csv/'+
Baby+var+'_Hurst.csv', usecols=[1])
df.columns=[Baby+' '+var+' Hurst']
Babydflst.append(df)
BabyFrame = xform_to_twind(Babydflst)
except IOError:
pass
if graph==True:
twind_boxplot(BabyFrameFin, var, ' Hurst Exponent for 24 hours')
#saveplot(var+'_Hurst_summ')
#display(BabyFrameFin)
#savecsv(var+'_Hurst_summ', BabyFrameFin)
return BabyFrame
In [15]:
def loadall(var):
vardct = {
'ed1' : loadeyedrop(var, 'EyeDrop1'),
'ed2' : loadeyedrop(var, 'EyeDrop2'),
'ed3' : loadeyedrop(var, 'EyeDrop3'),
's10' : loadtensec(var),
'h24' : load24hrs(var),
'FE' : loadfuzzyen(var),
'LZ' : loadLZcomp(var),
'LLE' : loadLLE(var),
'Hu' : loadHurst(var)}
return vardct
In [16]:
#set all to dicts
PRdct = loadall('PR')
SPdct = loadall('SpO2')
STdct = loadall('StO2')
FTdct = loadall('FTOE')
measlst = ['ed1', 'ed2', 'ed3', 's10', 'h24', 'FE', 'LZ', 'LLE', 'Hu']
In [17]:
ind24hlst = ['h24', 'FE', 'LZ', 'LLE', 'Hu']
ind300slst = ['ed1', 'ed2', 'ed3']
ind240slst = ['s10']
twind2 = list(np.arange(0, 310, 10))
twind3 = list(np.arange(0, 250, 10))
twind2.insert(0, 'Baseline')
twind3.insert(0, 'Baseline')
In [18]:
def meanplot(dct, meas, var):
dct[meas].mean(axis=0).plot(yerr=dct[meas].std(axis=0), ecolor='r')
if meas in ind24hlst:
plt.ylabel(var)
plt.xlim(-1,9)
plt.xticks(np.arange(0, 9), twind, rotation='vertical')
elif meas in ind300slst:
plt.ylabel(var)
plt.xlim(-1, 32)
plt.xticks(np.arange(0, 32), twind2, rotation='vertical')
elif meas in ind240slst:
plt.ylabel(var)
plt.xlim(-1, 26)
plt.xticks(np.arange(0, 26), twind3, rotation='vertical')
plt.show()
df = pd.DataFrame({var+' '+ meas +' mean':dct[meas].mean(axis=0)
, var+' '+(meas)+' std dev':dct[meas].std(axis=0)})
display(df)
return df
In [19]:
#reload through all ROP babies CHECK
#graph the total of everything, not boxplot. should be easy because everything returned is a dict
#PRdct['ed1'] is a dataframe, just plot CHECK
#overlay CI on boxplot CHECK
#make subplots and share x axis CHECK
#everything shoud be baseline:time epoch and then it should be analyzed that way CHECK
#make analysis for all measures (just more boxes, repeat. easy)
#make duplicate notebook for SNAPP stratification
#Future:
#seaborn.FacetGrid for easier data visualizations
In [20]:
for i in measlst:
meanplot(PRdct, i, 'PR')
In [21]:
def statsanalyze(dct, meas, epoch):
#bl = PRdct['h24']['-3:0']
#a = PRdct['h24']['0:3']
#subtract time epoch by baseline
if meas in ind24hlst:
bl = dct[meas]['-3:0']
else:
bl = dct[meas]['Baseline']
a = dct[meas][epoch]
r1 = a - bl
#make into a dataframe
df = pd.DataFrame({'Baseline':bl, 'Epoch':a, 'Difference':r1})
df = df.dropna()
#print n, mean, std dev, std error, min, and max of dataset
n = len(df)
mean = np.nanmean(r1)
stdev = np.nanstd(r1)
stderr = stats.sem(r1, nan_policy='omit')
rmin = np.amin(r1)
rmax = np.amax(r1)
#confidence intervals
cimin, cimax = stats.t.interval(0.95, 10, loc=mean, scale=stdev)
#paired t-test
tvalue, pvalueT = stats.ttest_rel(df['Baseline'].values, df['Epoch'].values)
#wilcoxon
svalue, pvalueW = stats.ranksums(df['Baseline'].values, df['Epoch'].values)
#plot distribution
# Two subplots, the axes array is 1-d
# http://stackoverflow.com/questions/23969619/plotting-with-seaborn-using-the-matplotlib-object-oriented-interface
f, (ax1, ax2) = plt.subplots(2, sharex=True)
plt.title('Distribution of Difference')
sns.distplot(df['Difference'].values, ax=ax1, kde=True)
sns.boxplot(df['Difference'].values, ax=ax2, width=.4, palette='Set3', linewidth=2) #gray diamonds are outliers
sns.stripplot([cimin, cimax], color='r', marker='d') #red diamonds are 95% CI
ax1.set_ylabel('Relative Frequency')
plt.xlabel('Difference')
plt.show()
if pvalueT < 0.05 or pvalueW < 0.05:
print "******SIGNIFICANT*******"
print tabulate([['N' , n], ['Mean Diff', mean], ['Std Dev', stdev], ['Std Err', stderr],
['Min', rmin], ['Max', rmax], ['95% CI Min', cimin],
['95% CI Max', cimax], ['T value', tvalue], ['p-value t-test', pvalueT],
['S value', svalue], ['p-value Wilcoxon', pvalueW]], headers=['Variable', 'Value'])
return
In [22]:
statsanalyze(STdct, 'h24', '21:24')
In [ ]: