Clean and Organize data for the I-SPY1 Clinical Trial


In [1]:
# load module by Julio and pandas
from ispy1 import clean_data
import pandas as pd

file = './data/I-SPY_1_All_Patient_Clinical_and_Outcome_Data.xlsx'
df = clean_data.clean_my_data(file)
df.head(2)

# save clean data in new  csv file
df.to_csv('./data/I-SPY_1_clean_data.csv')

df.head(2)


Out[1]:
age White ER+ PR+ HR+ Bilateral Right_Breast MRI_LD_Baseline MRI_LD_1_3dAC MRI_LD_Int_Reg MRI_LD_PreSurg Alive Survival_length RFS RFS_code PCR RCB
SUBJECTID
1001 38.73 Yes Yes No Yes No No 88.0 78.0 30.0 14.0 No 1264 751 1 No 2.0
1002 37.79 Yes Yes Yes Yes No Yes 29.0 26.0 66.0 16.0 No 1155 1043 1 No 3.0