In [158]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pandas import Series
#main_path = './../../../Dropbox/McGill-publication/Papers/Multisite/'
main_path = '/home/cdansereau/git/Projects/multisite/'
In [145]:
import os
from os import path
#files = filter(path.isfile, os.listdir('/Users/christian/git/Projects/multisite/demographic_data/'))
files = os.listdir('/Users/christian/git/Projects/multisite/demographic_data/')
In [195]:
all_demog = pd.DataFrame()
for i in range(len(files)):
path_csv = 'demographic_data/' + files[i]
if i==0:
all_demog = pd.read_csv(path_csv, sep=None, index_col=0, names=['','age','sex'], usecols=[0,2,3])
else:
tmp_demog = pd.read_csv(path_csv, sep=None, index_col=0, names=['','age','sex'], usecols=[0,2,3])
all_demog = all_demog.append(tmp_demog)
#print all_demog.shape
In [197]:
path_csv = 'demographic_1000fcon.csv'
demograph = pd.read_csv(path_csv,index_col=0)
#subj_names = demograph[demograph.multisite == 1].values[:,0]
#df1['e'] = Series(np.random.randn(sLength), index=df1.index)
#print demograph.index.values
valid_list = demograph.index.values
#print demograph.index.values
for i in range(len(demograph.index)):
valid_list[i] = valid_list[i].replace(' ','')
#print valid_list
#demograph
print len(valid_list)
# make sure that we remove duplicates
all_demog["index"] = all_demog.index
all_demog.drop_duplicates(subset='index', take_last=True, inplace=True)
del all_demog["index"]
all_demog = all_demog.loc[valid_list,:]
print
demograph['age'] = Series(all_demog['age'].astype(int), index=all_demog.index)
demograph['sex'] = Series((all_demog['sex']=='m').astype(int), index=all_demog.index)
demograph.to_csv('demographic_1000fcon_consolidated.csv')
In [199]:
print demograph.describe()
plt.figure()
plt.plot(demograph['FD '])
plt.figure()
plt.plot(demograph['age'])
Out[199]:
In [ ]: