In [142]:
import os
import pandas as pd
os.path.abspath(os.curdir)
Out[142]:
In [143]:
# Load all the transformed files into a dataframes to be merged.
load_path = os.path.abspath(os.curdir)
xfm_path = os.path.abspath('../transform')
project_names = [project for project in os.listdir(xfm_path) if '.' not in project]
csv_files = list()
for project_name in project_names:
xfm_dir = os.path.join(xfm_path, project_name)
files = os.listdir(xfm_dir)
[csv_files.append(os.path.join(xfm_path, project_name, csv)) for csv in files if '.csv' in csv]
In [144]:
# Define the order of the columns we want in our final table.
columns = ['project',
'site_id',
'participant_id',
'diagnosis',
'sex',
'age',
'handedness',
'full iq',
'performance iq',
'verbal iq']
In [145]:
df_list = [pd.read_csv(i) for i in csv_files]
df = pd.concat(df_list)
df.to_csv(os.path.join(load_path, 'phenotype.csv'), columns=columns, index=False)
df.head()
Out[145]:
In [193]:
img_path = os.path.abspath( '../clean-csv/all.csv')
img = pd.read_csv(img_path)
img = img.set_index('id', drop=False)
img.head()
Out[193]:
In [199]:
t1_count = img.groupby(by='id').count()
img['t1_count'] = t1_count.T1url
img.head()
Out[199]:
In [206]:
for idx, df in img[img.t1_count > 1].iterrows():
print idx, df.T1url, df.t1_count
break
In [209]:
for idx, df in img.loc['0021002'].iterrows():
print idx, df.T1url, df.t1_count
In [ ]: