Time Epoch

Box and Whisker Plot - Summary Data


In [1]:
import pandas as pd
import numpy as np
from IPython.display import display
from pylab import *
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from scipy import stats
import seaborn.apionly as sns
from tabulate import tabulate

%matplotlib inline
#plt.rcParams['figure.figsize'] = (20.0, 10.0) #set figure size

In [2]:
varlst = ['PI', 'PR', 'SpO2', 'StO2', 'FTOE'] #list of variables
twind = ['-3:0','0:3','3:6','6:9','9:12','12:15','15:18','18:21','21:24'] #time window index
ind = list(np.arange(0, 250, 10)) #define index for 240
ind2= list(np.arange(0, 310, 10)) #define index for 5 mins
ind.insert(0, 'Baseline')
ind2.insert(0, 'Baseline')

In [3]:
import ROPtimes as rt

In [4]:
Babylst = ['ROP003', 'ROP005', 'ROP006', 'ROP007', 'ROP008', 'ROP009', 'ROP010',
          'ROP011', 'ROP012', 'ROP013', 'ROP014', 'ROP015', 'ROP017', 'ROP018',
          'ROP019', 'ROP020', 'ROP021', 'ROP022', 'ROP023', 'ROP025']

#update this as you go

In [5]:
def saveplot(filename):
    plt.savefig('Summary Data/plots/'+filename)
    
def savecsv(filename, df):
    df.to_csv('Summary Data/csv/'+filename+'.csv')

In [6]:
#d = {key: value for (key, value) in iterable}

Eye Drops


In [7]:
def loadeyedrop(var, eyedrop, graph=False):
    Babydflst = []
    
    for Baby in Babylst:
        try:
            Babyframe = pd.DataFrame()
            df = pd.read_csv('/Users/John/Desktop/ROP Python/Data/'+Baby+'/csv/'+
                     Baby+var+'_'+eyedrop+'_mean.csv', usecols=[var+' mean'],
                            index_col=None, header=0)
            colname = Baby + ' ' + var + ' mean'
            df.columns=[colname]
            Babydflst.append(df)
            BabyFrame = pd.concat(Babydflst, axis=1)
            BabyFrame = BabyFrame.transpose()
            BabyFrame.columns = ind2
            if var == 'PI':
                mask = np.greater(BabyFrame, 5)
                BabyFrame[mask] = np.nan
            
        except IOError:
            pass
        
    if graph == True:
        BabyFrame.boxplot(return_type='axes')
        plt.title(var + ' ' +eyedrop + ' Q Ten Seconds Eye Drops')
        plt.show()
        saveplot(var+'_'+eyedrop+'_summary_tensec')
    
    #display(BabyFrame)
    savecsv(var+'_'+eyedrop+'_summary_tensec', BabyFrame)
    return BabyFrame

Every 10 Sec


In [8]:
def loadtensec(var, graph=False):
    
    Babydflst = []
    
    for Baby in Babylst:
        try:
            Babyframe = pd.DataFrame()
            df = pd.read_csv('/Users/John/Desktop/ROP Python/Data/'+Baby+'/csv/'+
                     Baby+var+'_mean_stddev_10sec.csv', usecols=[var+' mean'],
                            index_col=None, header=0)
            colname = Baby + ' ' + var + ' mean'
            df.columns=[colname]
            Babydflst.append(df)
            BabyFrame = pd.concat(Babydflst, axis=1)
            BabyFrame = BabyFrame.transpose()
            BabyFrame.columns = ind
            if var == 'PI':
                mask = np.greater(BabyFrame, 5)
                BabyFrame[mask] = np.nan
            
        except IOError:
            pass
        
    if graph == True:
        BabyFrame.boxplot(return_type='axes')
        plt.title(var + ' Q Ten Seconds From Start of ROP Exam')
        plt.show()
        saveplot(var+'_summary_tensec')
    
    #display(BabyFrame)
    #savecsv(var+'_summary_tensec', BabyFrame)
    
    return BabyFrame

24 Hrs


In [9]:
def xform_to_twind(dflst):
    df = pd.concat(dflst, axis=1)
    df = df.transpose()
    df.columns = twind
    
    return df
    
def twind_boxplot(df, var, label):
    df.boxplot(return_type='axes')
    plt.title(var + label)
    plt.show()
    
    return

In [10]:
def load24hrs(var, graph=False):
    Babydflst = []
    
    for Baby in Babylst:
        try:
            Babyframe = pd.DataFrame()
            df = pd.read_csv('/Users/John/Desktop/ROP Python/Data/'+Baby+'/csv/'+
                     Baby+var+'_mean_stddev_24hrs.csv', usecols=[var+' mean'],
                            index_col=None, header=0)
            df.columns=[Baby+' '+var+' mean']
            Babydflst.append(df)
            
            BabyFrame = xform_to_twind(Babydflst)
            
            if var == 'PI':
                mask = np.greater(BabyFrame, 5)
                BabyFrame[mask] = np.nan
            
        except IOError:
            pass
        
    if graph == True:
        twind_boxplot(BabyFrame, var, ' for 24 hours')
        #saveplot(var+'_24hrs_summ')
        
    #display(BabyFrame)
    #savecsv(var+'_24hrs_summ', BabyFrame)

    return BabyFrame

Fuzzy Entropy


In [11]:
def loadfuzzyen(var, graph=False):
    
    Babydflst = []
    for Baby in Babylst:
        try:
            
            df = pd.read_csv('/Users/John/Desktop/ROP Python/Data/'+Baby+'/csv/'+
                         Baby+var+'_FuzzyEn.csv', usecols=[1])
            df.columns = [Baby + ' ' + var + ' FuzzyEn']
            Babydflst.append(df)
            
            BabyFrame = xform_to_twind(Babydflst)
            
            if var == 'PI':
                mask = np.greater(BabyFrame, 5)
                BabyFrame[mask] = np.nan
            
        except IOError:
            pass
    
    if graph == True:
        twind_boxplot(BabyFrameFin, var, ' Fuzzy Entropy for 24 hours')
        #saveplot(var+'_fuzzyen_summ')
    
    #display(BabyFrame)
    #savecsv(var+'_fuzzyen_summ', BabyFrame)
    
    return BabyFrame

Lempel-Ziv Complexity


In [12]:
def loadLZcomp(var, graph=False):
    
    Babydflst = []
    
    for Baby in Babylst:
        try:
            df = pd.read_csv('/Users/John/Desktop/ROP Python/Data/'+Baby+'/csv/'+
                     Baby+var+'_LZComp.csv', usecols=[1])
            Babydflst.append(df)
            df.columns=[Baby+' '+var+' LZ Comp']
            
            BabyFrame = xform_to_twind(Babydflst)
          
                
        except IOError:
            pass

    if graph == True:
        twind_boxplot(BabyFrame, var, ' Lempel-Ziv Complexity for 24 hours')
        #saveplot(var+'_LZ_summ')
    
    #display(BabyFrame)
    #savecsv(var+'_LZ_summ', BabyFrame)
    
    
    return BabyFrame

Largest Lyapunov Exponent


In [13]:
def loadLLE(var, graph=False):
    
    Babydflst = []
    
    for Baby in Babylst:
        try:
            df = pd.read_csv('/Users/John/Desktop/ROP Python/Data/'+Baby+'/csv/'+
                         Baby+var+'_LLE.csv', usecols=[1])
            df.columns=[Baby+' '+var+' LLE']
            Babydflst.append(df)
            
            BabyFrame = xform_to_twind(Babydflst)
            
        except IOError:
            pass

    if graph==True:
        twind_boxplot(BabyFrame, var, ' Largest Lyapunov Exponent for 24 hours')
        #saveplot(var+'_LLE_summ')
    
    #display(BabyFrameFin)
    #savecsv(var+'_LLE_summ', BabyFrame)
    
    return BabyFrame

Hurst


In [14]:
def loadHurst(var, graph=False):
    
    Babydflst = []
    
    for Baby in Babylst:
        try:
            df = pd.read_csv('/Users/John/Desktop/ROP Python/Data/'+Baby+'/csv/'+
                     Baby+var+'_Hurst.csv', usecols=[1])
            df.columns=[Baby+' '+var+' Hurst']
            Babydflst.append(df)
            
            BabyFrame = xform_to_twind(Babydflst)
            
        except IOError:
            pass

    if graph==True:
        twind_boxplot(BabyFrameFin, var, ' Hurst Exponent for 24 hours')
        #saveplot(var+'_Hurst_summ')
    
    #display(BabyFrameFin)
    #savecsv(var+'_Hurst_summ', BabyFrameFin)
    
    return BabyFrame

In [15]:
def loadall(var):
    vardct = {
        'ed1' : loadeyedrop(var, 'EyeDrop1'),
        'ed2' : loadeyedrop(var, 'EyeDrop2'),
        'ed3' : loadeyedrop(var, 'EyeDrop3'),
        's10' : loadtensec(var),
        'h24' : load24hrs(var),
        'FE' : loadfuzzyen(var),
        'LZ' : loadLZcomp(var),
        'LLE' : loadLLE(var),
        'Hu' : loadHurst(var)}
    return vardct

In [16]:
#set all to dicts
PRdct = loadall('PR')
SPdct = loadall('SpO2')
STdct = loadall('StO2')
FTdct = loadall('FTOE')
measlst = ['ed1', 'ed2', 'ed3', 's10', 'h24', 'FE', 'LZ', 'LLE', 'Hu']

In [17]:
ind24hlst = ['h24', 'FE', 'LZ', 'LLE', 'Hu']
ind300slst = ['ed1', 'ed2', 'ed3']
ind240slst = ['s10']

twind2 = list(np.arange(0, 310, 10))
twind3 = list(np.arange(0, 250, 10))
twind2.insert(0, 'Baseline')
twind3.insert(0, 'Baseline')

In [18]:
def meanplot(dct, meas, var):
    
    dct[meas].mean(axis=0).plot(yerr=dct[meas].std(axis=0), ecolor='r')
    
    
    if meas in ind24hlst:
        plt.ylabel(var)
        plt.xlim(-1,9)
        plt.xticks(np.arange(0, 9), twind, rotation='vertical')
       
    elif meas in ind300slst:
        
        plt.ylabel(var)
        plt.xlim(-1, 32)
        plt.xticks(np.arange(0, 32), twind2, rotation='vertical')
        

    elif meas in ind240slst:
    
        plt.ylabel(var)
        plt.xlim(-1, 26)
        plt.xticks(np.arange(0, 26), twind3, rotation='vertical')
    
    plt.show()
    
    df = pd.DataFrame({var+' '+ meas +' mean':dct[meas].mean(axis=0)
                       , var+' '+(meas)+' std dev':dct[meas].std(axis=0)})
    
    display(df)
    
    return df

Pulse Rate


In [19]:
#reload through all ROP babies CHECK
    
#graph the total of everything, not boxplot. should be easy because everything returned is a dict
    #PRdct['ed1'] is a dataframe, just plot CHECK
    #overlay CI on boxplot CHECK

#make subplots and share x axis CHECK
    
#everything shoud be baseline:time epoch and then it should be analyzed that way CHECK

#make analysis for all measures (just more boxes, repeat. easy)

#make duplicate notebook for SNAPP stratification

#Future:
    #seaborn.FacetGrid for easier data visualizations

In [20]:
for i in measlst:
    meanplot(PRdct, i, 'PR')


PR ed1 mean PR ed1 std dev
Baseline 156.403307 13.034108
0 160.470588 18.153783
10 158.921569 16.793385
20 158.558824 23.248129
30 161.411111 19.489198
40 161.133333 28.391900
50 164.311111 21.526826
60 169.300000 16.085486
70 166.260417 15.565985
80 166.800926 14.864231
90 164.140741 15.005435
100 164.154902 16.060917
110 163.794118 19.322961
120 162.028704 19.173429
130 162.754902 19.968562
140 164.249020 17.631156
150 166.166667 17.233473
160 170.143750 15.294428
170 168.245098 15.729607
180 168.205882 15.784962
190 168.721569 17.680474
200 167.578431 20.453124
210 166.912500 19.158237
220 165.380392 18.861156
230 167.027778 19.852252
240 165.953704 19.693551
250 164.156863 20.146294
260 166.787037 19.050570
270 169.127451 19.989627
280 165.170833 23.322686
290 165.625000 22.668096
300 163.813725 20.193068
PR ed2 mean PR ed2 std dev
Baseline 156.403307 13.034108
0 160.470588 18.153783
10 158.921569 16.793385
20 158.558824 23.248129
30 161.411111 19.489198
40 161.133333 28.391900
50 164.311111 21.526826
60 169.300000 16.085486
70 166.260417 15.565985
80 166.800926 14.864231
90 164.140741 15.005435
100 164.154902 16.060917
110 163.794118 19.322961
120 162.028704 19.173429
130 162.754902 19.968562
140 164.249020 17.631156
150 166.166667 17.233473
160 170.143750 15.294428
170 168.245098 15.729607
180 168.205882 15.784962
190 168.721569 17.680474
200 167.578431 20.453124
210 166.912500 19.158237
220 165.380392 18.861156
230 167.027778 19.852252
240 165.953704 19.693551
250 164.156863 20.146294
260 166.787037 19.050570
270 169.127451 19.989627
280 165.170833 23.322686
290 165.625000 22.668096
300 163.813725 20.193068
PR ed3 mean PR ed3 std dev
Baseline 156.403307 13.034108
0 160.470588 18.153783
10 158.921569 16.793385
20 158.558824 23.248129
30 161.411111 19.489198
40 161.133333 28.391900
50 164.311111 21.526826
60 169.300000 16.085486
70 166.260417 15.565985
80 166.800926 14.864231
90 164.140741 15.005435
100 164.154902 16.060917
110 163.794118 19.322961
120 162.028704 19.173429
130 162.754902 19.968562
140 164.249020 17.631156
150 166.166667 17.233473
160 170.143750 15.294428
170 168.245098 15.729607
180 168.205882 15.784962
190 168.721569 17.680474
200 167.578431 20.453124
210 166.912500 19.158237
220 165.380392 18.861156
230 167.027778 19.852252
240 165.953704 19.693551
250 164.156863 20.146294
260 166.787037 19.050570
270 169.127451 19.989627
280 165.170833 23.322686
290 165.625000 22.668096
300 163.813725 20.193068
PR s10 mean PR s10 std dev
Baseline 156.792797 13.962259
0 160.470588 18.153783
10 158.921569 16.793385
20 158.558824 23.248129
30 161.411111 19.489198
40 161.133333 28.391900
50 164.311111 21.526826
60 169.300000 16.085486
70 166.260417 15.565985
80 166.800926 14.864231
90 164.140741 15.005435
100 164.154902 16.060917
110 163.794118 19.322961
120 162.028704 19.173429
130 162.754902 19.968562
140 164.249020 17.631156
150 166.166667 17.233473
160 170.143750 15.294428
170 168.245098 15.729607
180 168.205882 15.784962
190 168.721569 17.680474
200 167.578431 20.453124
210 166.912500 19.158237
220 165.380392 18.861156
230 167.027778 19.852252
240 165.953704 19.693551
PR h24 mean PR h24 std dev
-3:0 157.002081 13.525930
0:3 155.056418 12.074023
3:6 158.025478 12.335327
6:9 157.582624 11.567579
9:12 155.437948 10.518966
12:15 155.273937 9.519762
15:18 155.340465 10.710047
18:21 157.773109 11.136841
21:24 156.401643 12.994702
PR FE mean PR FE std dev
-3:0 0.072334 0.028120
0:3 0.058623 0.021761
3:6 0.077279 0.027975
6:9 0.080878 0.028552
9:12 0.070688 0.023554
12:15 0.070895 0.020393
15:18 0.081702 0.022348
18:21 0.071670 0.027992
21:24 0.069049 0.022699
PR LZ mean PR LZ std dev
-3:0 0.396370 0.085960
0:3 0.379617 0.093431
3:6 0.393016 0.086301
6:9 0.401288 0.091726
9:12 0.391285 0.069592
12:15 0.395963 0.079173
15:18 0.414422 0.062874
18:21 0.411104 0.076410
21:24 0.399535 0.093434
PR LLE mean PR LLE std dev
-3:0 5.119796 0.087534
0:3 5.133995 0.090984
3:6 5.117864 0.078135
6:9 5.103193 0.107147
9:12 5.102729 0.107191
12:15 5.086854 0.082153
15:18 5.072230 0.075692
18:21 5.102561 0.081817
21:24 5.118423 0.061484
PR Hu mean PR Hu std dev
-3:0 0.777083 0.034417
0:3 0.796914 0.036262
3:6 0.783992 0.033958
6:9 0.790186 0.028035
9:12 0.795889 0.033056
12:15 0.783888 0.035241
15:18 0.791185 0.023586
18:21 0.788259 0.027138
21:24 0.781132 0.038035

In [21]:
def statsanalyze(dct, meas, epoch):
    
    #bl = PRdct['h24']['-3:0'] 
    #a = PRdct['h24']['0:3']
    
    #subtract time epoch by baseline
    if meas in ind24hlst:
        bl = dct[meas]['-3:0'] 

    else:
        bl = dct[meas]['Baseline']
    
    a = dct[meas][epoch]
    
    r1 = a - bl
    
    #make into a dataframe
    df = pd.DataFrame({'Baseline':bl, 'Epoch':a, 'Difference':r1})
    df = df.dropna()

    #print n, mean, std dev, std error, min, and max of dataset
    
    n = len(df)
    mean = np.nanmean(r1)
    stdev = np.nanstd(r1)
    stderr = stats.sem(r1, nan_policy='omit')
    rmin = np.amin(r1)
    rmax = np.amax(r1)

    #confidence intervals
    cimin, cimax = stats.t.interval(0.95, 10, loc=mean, scale=stdev) 

    #paired t-test
    tvalue, pvalueT = stats.ttest_rel(df['Baseline'].values, df['Epoch'].values)

    #wilcoxon
    svalue, pvalueW = stats.ranksums(df['Baseline'].values, df['Epoch'].values)

    #plot distribution
    
    # Two subplots, the axes array is 1-d
    # http://stackoverflow.com/questions/23969619/plotting-with-seaborn-using-the-matplotlib-object-oriented-interface
    f, (ax1, ax2) = plt.subplots(2, sharex=True)
    
    plt.title('Distribution of Difference')
    sns.distplot(df['Difference'].values, ax=ax1, kde=True)
    
    sns.boxplot(df['Difference'].values, ax=ax2, width=.4, palette='Set3', linewidth=2) #gray diamonds are outliers
    sns.stripplot([cimin, cimax], color='r', marker='d') #red diamonds are 95% CI
    
    ax1.set_ylabel('Relative Frequency')
    
    plt.xlabel('Difference')
    
    plt.show()
    
    if pvalueT < 0.05 or pvalueW < 0.05:
        print "******SIGNIFICANT*******"
    
    print tabulate([['N' , n], ['Mean Diff', mean], ['Std Dev', stdev], ['Std Err', stderr],
                ['Min', rmin], ['Max', rmax], ['95% CI Min', cimin],
                ['95% CI Max', cimax], ['T value', tvalue], ['p-value t-test', pvalueT],
                ['S value', svalue], ['p-value Wilcoxon', pvalueW]], headers=['Variable', 'Value'])
    
    return

In [22]:
statsanalyze(STdct, 'h24', '21:24')


Variable               Value
----------------  ----------
N                  15
Mean Diff           0.733733
Std Dev             5.60587
Std Err             1.49823
Min                -9.13318
Max                14.3458
95% CI Min        -11.7569
95% CI Max         13.2244
T value            -0.489733
p-value t-test      0.631907
S value            -0.228129
p-value Wilcoxon    0.819546

In [ ]: