In [1]:
import numpy as np
import pandas as pd

In [2]:
varlst = ['PI', 'PR', 'SpO2', 'StO2', 'FTOE']

In [3]:
def loadtensecsum(var):
    filename = var + '_summary_tensec.csv'
    df1 = pd.read_csv("/Users/John/Desktop/ROP Python/Summary Data/csv/"+filename)
    return df1

In [4]:
# store dataframes from function into a dict

vardict = {}

for i in varlst:
    a = loadtensecsum(i)
    vardict[i] = a

In [5]:
def idrowdict(var):
    idcol = vardict[var].columns[0] #get id column
    aa = vardict[var][idcol].values #get full string
    idlst = map(lambda x: x[4:6], aa) #get substring. real IDs
    
    kr = dict(zip(idlst, range(len(vardict[var])))) #dict key-row relationship for the other dict

    return kr

In [6]:
from collections import OrderedDict
# key-row dict relationshipf for all variables
kr = {}

total_idlst = []

real_idlst = []

for i in varlst:
    kr[i] = idrowdict(i)
    total_idlst.append(kr[i].keys())

#get rid of duplicates
for i in [item for sublist in total_idlst for item in sublist]:
    if i not in real_idlst:
        real_idlst.append(i)

In [7]:
#intitialize dict

d = dict.fromkeys(real_idlst) #keys
for i in d.keys(): #values of variables
    d[i] = dict.fromkeys(varlst)

In [8]:
#line up the keys and values in the dict
for i in varlst:
    for k, v in kr[i].iteritems():
        d[k][i] = vardict[i].iloc[v][1:].values

In [9]:
# set time range
timeind = range(-10, 250, 10)
#timeind.insert(0, 'Baseline')

In [10]:
def dfsfromdict(vardict):
    
    frames = []
    
    for i in real_idlst:
        df = pd.DataFrame.from_dict(vardict[i])
        df['Time'] = timeind

        #get ID to fill length of df
        IDcolumn = []

        for x in np.arange(len(df)):
            IDcolumn.append(i)

        df['Id'] = IDcolumn 

        #rearrange columns
        dfcols = ['Id', 'Time', 'PI', 'PR', 'SpO2', 'StO2', 'FTOE']
        df = df[dfcols]
    
        frames.append(df)
        
        dff = pd.concat(frames, axis=0)
        dff = dff.sort(columns=['Id', 'Time'])
        dff = dff.replace(to_replace='-10', value='Baseline')
        dff = dff.replace(to_replace='None', value=np.NaN)
    return dff

In [11]:
dff = dfsfromdict(d)


/Users/John/Library/Enthought/Canopy_64bit/User/lib/python2.7/site-packages/IPython/kernel/__main__.py:24: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)

In [12]:
dff = dff.set_index('Id')

In [13]:
dff.to_csv('ROP10sec_clean.csv') #auto Nan -> blank

In [ ]:


In [ ]: