In [1]:
import numpy as np
import pandas as pd
varlst = ['PI', 'PR', 'SpO2', 'StO2', 'FTOE']
crossvarlst = ['SpO2xStO2', 'PIxStO2', 'PIxPR', 'PRxFTOE',
'PRxStO2', 'PIxSpO2', 'PIxFTOE', 'PRxSpO2']
In [2]:
#load files
In [3]:
def loadmeas(var, meas):
filename = var + '_' + meas + '_summ.csv'
df = pd.read_csv("/Users/John/Desktop/ROP Python/Summary Data/csv/"+filename)
return df
def loadcrossmeas(var1, var2, meas):
filename = meas+'_'+var1+'x'+var2+'_summ.csv'
df = pd.read_csv("/Users/John/Desktop/ROP Python/Summary Data/csv/"+filename)
return df
In [4]:
desatslst = ['Mild', 'Mod', 'Sev'] #keys for dict
desatsdict = {} #keys are Mild, Mod, DeSat
for i in desatslst:
a = loadmeas(i, 'desats')
desatsdict[i] = a
In [5]:
def dictfromload(meas):
vardict = {}
for i in varlst:
a = loadmeas(i, meas)
vardict[i] = a
return vardict
In [6]:
def dictfromloadcross(meas):
dictname = {}
for i in varlst[1:]:
dfloaded = loadcrossmeas('PI', i, meas)
dictname['PIx'+i] = dfloaded
for i in varlst[2:]:
dfloaded2 = loadcrossmeas('PR', i, meas)
dictname['PRx'+i] = dfloaded2
dfloaded3 = loadcrossmeas('SpO2', 'StO2', meas)
dictname['SpO2x'+'StO2'] = dfloaded3
return dictname
In [7]:
summ24hrsdict = dictfromload('24hrs') #keys are vars
fuzzyendict = dictfromload('fuzzyen')
LZdict = dictfromload('LZ')
LLEdict = dictfromload('LLE')
Hurstdict = dictfromload('Hurst')
In [8]:
MIdict = dictfromloadcross('MI')
xFEdict = dictfromloadcross('xFE')
In [9]:
dctlst = [summ24hrsdict, fuzzyendict, LZdict, LLEdict, Hurstdict]
desatsdictlst = [desatsdict]
xdctlst = [MIdict, xFEdict]
In [10]:
def idrowdict(var, vardict):
idcol = vardict[var].columns[0] #get id column
aa = vardict[var][idcol].values #get full string
idlst = map(lambda x: x[4:6], aa) #get substring. real IDs
kr = dict(zip(idlst, range(len(vardict[var])))) #dict key-row relationship for the other dict
return kr
In [11]:
def getrealID(varlst, dctlst):
from collections import OrderedDict
# key-row dict relationshipf for all variables
kr = {}
total_idlst = []
real_idlst = []
for x in dctlst:
for i in varlst:
kr[i] = idrowdict(i, x)
total_idlst.append(kr[i].keys())
#get rid of duplicates
for i in [item for sublist in total_idlst for item in sublist]:
if i not in real_idlst:
real_idlst.append(i)
return real_idlst, kr
In [12]:
realidlst, kr = getrealID(varlst, dctlst)
desatidlst, krdesat = getrealID(desatslst, desatsdictlst)
crossidlst, krcross = getrealID(crossvarlst, xdctlst)
In [13]:
#intitialize dict
columnlst = ['PI', 'PR', 'SpO2', 'StO2', 'FTOE',
'Mild_DeSats', 'Mod_DeSats', 'Sev_DeSats',
'PI_FuzzyEn', 'PR_FuzzyEn', 'SpO2_FuzzyEn', 'StO2_FuzzyEn', 'FTOE_FuzzyEn',
'PI_LZ', 'PR_LZ', 'SpO2_LZ', 'StO2_LZ', 'FTOE_LZ',
'PI_LLE', 'PR_LLE', 'SpO2_LLE', 'StO2_LLE', 'FTOE_LLE',
'PI_Hurst', 'PR_Hurst', 'SpO2_Hurst', 'StO2_Hurst', 'FTOE_Hurst'
'MI_PIxPR', 'MI_PIxSpO2', 'MI_PIxStO2', 'MI_PIxFTOE',
'MI_PRxSpO2', 'MI_PRxStO2', 'MI_PRxFTOE', 'MI_SpO2xStO2',
'xFE_PIxPR', 'xFE_PIxSpO2', 'xFE_PIxStO2', 'xFE_PIxFTOE',
'xFE_PRxSpO2', 'xFE_PRxStO2', 'xFE_PRxFTOE', 'xFE_SpO2xStO2']
d = dict.fromkeys(realidlst) #keys. use realidlst because it has the most
for i in d.keys(): #values of variables
d[i] = dict.fromkeys(columnlst)
In [14]:
#line up the keys and values in the dict, and return the dict
for i in varlst:
for k, v in kr[i].iteritems():
d[k][i] = summ24hrsdict[i].iloc[v][1:].values
for i in desatslst:
for k, v in krdesat[i].iteritems():
d[k][i+'_DeSats'] = desatsdict[i].iloc[v][1:].values
for i in varlst:
for k, v in kr[i].iteritems():
d[k][i+'_FuzzyEn'] = fuzzyendict[i].iloc[v][1:].values
for i in varlst:
for k, v in kr[i].iteritems():
d[k][i+'_LZ'] = LZdict[i].iloc[v][1:].values
for i in varlst:
for k, v in kr[i].iteritems():
d[k][i+'_LLE'] = LLEdict[i].iloc[v][1:].values
for i in varlst:
for k, v in kr[i].iteritems():
d[k][i+'_Hurst'] = Hurstdict[i].iloc[v][1:].values
for i in crossvarlst:
for k, v in krcross[i].iteritems():
d[k]['MI_'+i] = MIdict[i].iloc[v][1:].values
for i in crossvarlst:
for k, v in krcross[i].iteritems():
d[k]['xFE_'+i] = xFEdict[i].iloc[v][1:].values
In [15]:
# set time range
timeind = range(0, 27, 3)
In [16]:
def dfsfromdict(dct):
frames = []
for i in realidlst:
df = pd.DataFrame.from_dict(dct[i])
df['Time'] = timeind
#get ID to fill length of df
IDcolumn = []
for x in np.arange(len(df)):
IDcolumn.append(i)
df['Id'] = IDcolumn
#rearrange columns
dfcols = ['Id', 'Time', 'PI', 'PR', 'SpO2', 'StO2', 'FTOE',
'Mild_DeSats', 'Mod_DeSats', 'Sev_DeSats',
'PI_FuzzyEn', 'PR_FuzzyEn', 'SpO2_FuzzyEn', 'StO2_FuzzyEn', 'FTOE_FuzzyEn',
'PI_LZ', 'PR_LZ', 'SpO2_LZ', 'StO2_LZ', 'FTOE_LZ',
'PI_LLE', 'PR_LLE', 'SpO2_LLE', 'StO2_LLE', 'FTOE_LLE',
'PI_Hurst', 'PR_Hurst', 'SpO2_Hurst', 'StO2_Hurst', 'FTOE_Hurst'
'MI_PIxPR', 'MI_PIxSpO2', 'MI_PIxStO2', 'MI_PIxFTOE',
'MI_PRxSpO2', 'MI_PRxStO2', 'MI_PRxFTOE', 'MI_SpO2xStO2',
'xFE_PIxPR', 'xFE_PIxSpO2', 'xFE_PIxStO2', 'xFE_PIxFTOE',
'xFE_PRxSpO2', 'xFE_PRxStO2', 'xFE_PRxFTOE', 'xFE_SpO2xStO2']
df = df[dfcols]
frames.append(df)
dff = pd.concat(frames, axis=0)
dff = dff.sort(columns=['Id', 'Time'])
dff = dff.replace(to_replace='None', value=np.NaN)
return dff
In [17]:
dff = dfsfromdict(d)
In [18]:
dff = dff.set_index('Id')
In [19]:
dff.to_csv('ROP24hrs_clean.csv') #auto Nan -> blank