In [1]:
import numpy as np
import pandas as pd
varlst = ['PI', 'PR', 'SpO2', 'StO2', 'FTOE']
crossvarlst = ['SpO2xStO2', 'PIxStO2', 'PIxPR', 'PRxFTOE',
               'PRxStO2', 'PIxSpO2', 'PIxFTOE', 'PRxSpO2']

In [2]:
#load files

In [3]:
def loadmeas(var, meas):
    filename = var + '_' + meas + '_summ.csv'
    df = pd.read_csv("/Users/John/Desktop/ROP Python/Summary Data/csv/"+filename)
    return df

def loadcrossmeas(var1, var2, meas):
    filename = meas+'_'+var1+'x'+var2+'_summ.csv'
    df = pd.read_csv("/Users/John/Desktop/ROP Python/Summary Data/csv/"+filename)
    return df

In [4]:
desatslst = ['Mild', 'Mod', 'Sev'] #keys for dict
desatsdict = {} #keys are Mild, Mod, DeSat

for i in desatslst:
    a = loadmeas(i, 'desats')
    desatsdict[i] = a

In [5]:
def dictfromload(meas):

    vardict = {}

    for i in varlst:
        a = loadmeas(i, meas)
        vardict[i] = a
    
    return vardict

In [6]:
def dictfromloadcross(meas):
    
    dictname = {}
    
    for i in varlst[1:]:
        dfloaded = loadcrossmeas('PI', i, meas)
        dictname['PIx'+i] = dfloaded
    
    for i in varlst[2:]:
        dfloaded2 = loadcrossmeas('PR', i, meas)
        dictname['PRx'+i] = dfloaded2
    
    dfloaded3 = loadcrossmeas('SpO2', 'StO2', meas)
    dictname['SpO2x'+'StO2'] = dfloaded3
    
    return dictname

In [7]:
summ24hrsdict = dictfromload('24hrs') #keys are vars
fuzzyendict = dictfromload('fuzzyen')
LZdict = dictfromload('LZ')
LLEdict = dictfromload('LLE')
Hurstdict = dictfromload('Hurst')

In [8]:
MIdict = dictfromloadcross('MI') 
xFEdict = dictfromloadcross('xFE')

In [9]:
dctlst = [summ24hrsdict, fuzzyendict, LZdict, LLEdict, Hurstdict]
desatsdictlst = [desatsdict]
xdctlst = [MIdict, xFEdict]

In [10]:
def idrowdict(var, vardict):
    idcol = vardict[var].columns[0] #get id column
    aa = vardict[var][idcol].values #get full string
    idlst = map(lambda x: x[4:6], aa) #get substring. real IDs
    
    kr = dict(zip(idlst, range(len(vardict[var])))) #dict key-row relationship for the other dict

    return kr

In [11]:
def getrealID(varlst, dctlst):

    from collections import OrderedDict
    # key-row dict relationshipf for all variables
    kr = {}

    total_idlst = []

    real_idlst = []

    for x in dctlst:
        for i in varlst:
            kr[i] = idrowdict(i, x)
            total_idlst.append(kr[i].keys())

    #get rid of duplicates
    for i in [item for sublist in total_idlst for item in sublist]:
        if i not in real_idlst:
            real_idlst.append(i)
            
    return real_idlst, kr

In [12]:
realidlst, kr = getrealID(varlst, dctlst)
desatidlst, krdesat = getrealID(desatslst, desatsdictlst)
crossidlst, krcross = getrealID(crossvarlst, xdctlst)

In [13]:
#intitialize dict

columnlst = ['PI', 'PR', 'SpO2', 'StO2', 'FTOE', 
            'Mild_DeSats', 'Mod_DeSats', 'Sev_DeSats',
            'PI_FuzzyEn', 'PR_FuzzyEn', 'SpO2_FuzzyEn', 'StO2_FuzzyEn', 'FTOE_FuzzyEn',
            'PI_LZ', 'PR_LZ', 'SpO2_LZ', 'StO2_LZ', 'FTOE_LZ',
            'PI_LLE', 'PR_LLE', 'SpO2_LLE', 'StO2_LLE', 'FTOE_LLE',
            'PI_Hurst', 'PR_Hurst', 'SpO2_Hurst', 'StO2_Hurst', 'FTOE_Hurst'
            'MI_PIxPR', 'MI_PIxSpO2', 'MI_PIxStO2', 'MI_PIxFTOE',
            'MI_PRxSpO2', 'MI_PRxStO2', 'MI_PRxFTOE', 'MI_SpO2xStO2',
            'xFE_PIxPR', 'xFE_PIxSpO2', 'xFE_PIxStO2', 'xFE_PIxFTOE',
            'xFE_PRxSpO2', 'xFE_PRxStO2', 'xFE_PRxFTOE', 'xFE_SpO2xStO2']

d = dict.fromkeys(realidlst) #keys. use realidlst because it has the most
for i in d.keys(): #values of variables
    d[i] = dict.fromkeys(columnlst)

In [14]:
#line up the keys and values in the dict, and return the dict

for i in varlst:
    for k, v in kr[i].iteritems():
        d[k][i] = summ24hrsdict[i].iloc[v][1:].values

for i in desatslst:
    for k, v in krdesat[i].iteritems():
        d[k][i+'_DeSats'] = desatsdict[i].iloc[v][1:].values

for i in varlst:
    for k, v in kr[i].iteritems():
        d[k][i+'_FuzzyEn'] = fuzzyendict[i].iloc[v][1:].values
        
for i in varlst:
    for k, v in kr[i].iteritems():
        d[k][i+'_LZ'] = LZdict[i].iloc[v][1:].values

for i in varlst:
    for k, v in kr[i].iteritems():
        d[k][i+'_LLE'] = LLEdict[i].iloc[v][1:].values

for i in varlst:
    for k, v in kr[i].iteritems():
        d[k][i+'_Hurst'] = Hurstdict[i].iloc[v][1:].values

for i in crossvarlst:
    for k, v in krcross[i].iteritems():
        d[k]['MI_'+i] = MIdict[i].iloc[v][1:].values

for i in crossvarlst:
    for k, v in krcross[i].iteritems():
        d[k]['xFE_'+i] = xFEdict[i].iloc[v][1:].values

In [15]:
# set time range
timeind = range(0, 27, 3)

In [16]:
def dfsfromdict(dct):
    
    frames = []
    
    for i in realidlst:
        df = pd.DataFrame.from_dict(dct[i])
        df['Time'] = timeind

        #get ID to fill length of df
        IDcolumn = []

        for x in np.arange(len(df)):
            IDcolumn.append(i)

        df['Id'] = IDcolumn 

        #rearrange columns
        dfcols = ['Id', 'Time', 'PI', 'PR', 'SpO2', 'StO2', 'FTOE', 
            'Mild_DeSats', 'Mod_DeSats', 'Sev_DeSats',
            'PI_FuzzyEn', 'PR_FuzzyEn', 'SpO2_FuzzyEn', 'StO2_FuzzyEn', 'FTOE_FuzzyEn',
            'PI_LZ', 'PR_LZ', 'SpO2_LZ', 'StO2_LZ', 'FTOE_LZ',
            'PI_LLE', 'PR_LLE', 'SpO2_LLE', 'StO2_LLE', 'FTOE_LLE',
            'PI_Hurst', 'PR_Hurst', 'SpO2_Hurst', 'StO2_Hurst', 'FTOE_Hurst'
            'MI_PIxPR', 'MI_PIxSpO2', 'MI_PIxStO2', 'MI_PIxFTOE',
            'MI_PRxSpO2', 'MI_PRxStO2', 'MI_PRxFTOE', 'MI_SpO2xStO2',
            'xFE_PIxPR', 'xFE_PIxSpO2', 'xFE_PIxStO2', 'xFE_PIxFTOE',
            'xFE_PRxSpO2', 'xFE_PRxStO2', 'xFE_PRxFTOE', 'xFE_SpO2xStO2']
        
        df = df[dfcols]
    
        frames.append(df)
        
        dff = pd.concat(frames, axis=0)
        dff = dff.sort(columns=['Id', 'Time'])
        dff = dff.replace(to_replace='None', value=np.NaN)
    return dff

In [17]:
dff = dfsfromdict(d)


/Users/John/Library/Enthought/Canopy_64bit/User/lib/python2.7/site-packages/IPython/kernel/__main__.py:34: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)

In [18]:
dff = dff.set_index('Id')

In [19]:
dff.to_csv('ROP24hrs_clean.csv') #auto Nan -> blank