In [1]:
import numpy as np
import pandas as pd
In [2]:
varlst = ['PI', 'PR', 'SpO2', 'StO2', 'FTOE']
In [3]:
def loadtensecsum(var):
filename = var + '_summary_tensec.csv'
df1 = pd.read_csv("/Users/John/Desktop/ROP Python/Summary Data/csv/"+filename)
return df1
In [4]:
# store dataframes from function into a dict
vardict = {}
for i in varlst:
a = loadtensecsum(i)
vardict[i] = a
In [5]:
def idrowdict(var):
idcol = vardict[var].columns[0] #get id column
aa = vardict[var][idcol].values #get full string
idlst = map(lambda x: x[4:6], aa) #get substring. real IDs
kr = dict(zip(idlst, range(len(vardict[var])))) #dict key-row relationship for the other dict
return kr
In [6]:
from collections import OrderedDict
# key-row dict relationshipf for all variables
kr = {}
total_idlst = []
real_idlst = []
for i in varlst:
kr[i] = idrowdict(i)
total_idlst.append(kr[i].keys())
#get rid of duplicates
for i in [item for sublist in total_idlst for item in sublist]:
if i not in real_idlst:
real_idlst.append(i)
In [7]:
#intitialize dict
d = dict.fromkeys(real_idlst) #keys
for i in d.keys(): #values of variables
d[i] = dict.fromkeys(varlst)
In [8]:
#line up the keys and values in the dict
for i in varlst:
for k, v in kr[i].iteritems():
d[k][i] = vardict[i].iloc[v][1:].values
In [9]:
# set time range
timeind = range(-10, 250, 10)
#timeind.insert(0, 'Baseline')
In [10]:
def dfsfromdict(vardict):
frames = []
for i in real_idlst:
df = pd.DataFrame.from_dict(vardict[i])
df['Time'] = timeind
#get ID to fill length of df
IDcolumn = []
for x in np.arange(len(df)):
IDcolumn.append(i)
df['Id'] = IDcolumn
#rearrange columns
dfcols = ['Id', 'Time', 'PI', 'PR', 'SpO2', 'StO2', 'FTOE']
df = df[dfcols]
frames.append(df)
dff = pd.concat(frames, axis=0)
dff = dff.sort(columns=['Id', 'Time'])
dff = dff.replace(to_replace='-10', value='Baseline')
dff = dff.replace(to_replace='None', value=np.NaN)
return dff
In [11]:
dff = dfsfromdict(d)
In [12]:
dff = dff.set_index('Id')
In [13]:
dff.to_csv('ROP10sec_clean.csv') #auto Nan -> blank
In [ ]:
In [ ]: