In [1]:
import pandas as pd
import numpy as np
import json
import string
In [2]:
df = pd.read_csv('/home1/varunk/data/ABIDE1/RawDataBIDs/composite_phenotypic_file.csv') # , index_col='SUB_ID'
df = df.sort_values(['SUB_ID'])
In [8]:
df
Out[8]:
In [ ]:
In [9]:
# saving the file paths
!find /home1/varunk/data/ABIDE1/RawDataBIDs/ -name 'task-rest_bold.json' > scan_params_file.txt
In [10]:
# read the above created file paths:
with open('scan_params_file.txt', 'r') as f:
scan_param_paths = f.read().split('\n')[0:-1]
In [11]:
scan_param_paths
Out[11]:
In [12]:
# for json_path in scan_param_paths:
# with open(json_path, 'rt') as fp:
# task_info = json.load(fp)
# # Accessing the contents:
# tr = task_info['RepetitionTime']
# volumes = task_info['NumberofMeasurements']
# xdim_mm, ydim_mm = task_info['PixelSpacing'].split('x')
# zdim_mm = task_info['SpacingBetweenSlices']
# xdim_voxels, ydim_voxels = task_info['AcquisitionMatrix'].split('x')
# zdim_voxels = task_info['NumberOfSlices']
In [ ]:
In [13]:
SITES = np.unique(df.as_matrix(['SITE_ID']).squeeze())
data_frame = pd.DataFrame({
'SITE_NAME': [] ,
'TR': [],
'VOLUMES': [],
'xdim_mm': [],
'ydim_mm': [],
'zdim_mm': [],
'xdim_voxels': [],
'ydim_voxels': [],
'zdim_voxels': [],
'NUM_AUT_DSM_V': [] ,
'NUM_AUT_MALE_DSM_V': [] ,
'NUM_AUT_FEMALE_DSM_V': [],
'NUM_AUT_AGE_lte12_DSM_V' : [],
'NUM_AUT_AGE_12_18_DSM_V' : [],
'NUM_AUT_AGE_18_24_DSM_V': [],
'NUM_AUT_AGE_24_34_DSM_V' :[],
'NUM_AUT_AGE_34_50_DSM_V' : [],
'NUM_AUT_AGE_gt50_DSM_V' : [],
'NUM_AUT_DSM_IV' : [],
'NUM_AUT_MALE_DSM_IV' : [],
'NUM_AUT_FEMALE_DSM_IV' : [],
'NUM_ASP_DSM_IV' : [],
'NUM_ASP_MALE_DSM_IV' : [],
'NUM_ASP_FEMALE_DSM_IV' : [],
'NUM_PDDNOS_DSM_IV' : [],
'NUM_PDDNOS_MALE_DSM_IV' : [],
'NUM_PDDNOS_FEMALE_DSM_IV' : [],
'NUM_ASP_PDDNOS_DSM_IV' : [],
'NUM_ASP_PDDNOS_MALE_DSM_IV' : [],
'NUM_ASP_PDDNOS_FEMALE_DSM_IV' : [],
'NUM_TD' : [],
'NUM_TD_MALE' : [],
'NUM_TD_FEMALE' : [],
'NUM_TD_AGE_lte12' : [],
'NUM_TD_AGE_12_18' : [],
'NUM_TD_AGE_18_24' : [],
'NUM_TD_AGE_24_34' : [],
'NUM_TD_AGE_34_50' : [],
'NUM_TD_AGE_gt50' : []
})
In [14]:
# NUM_AUT =
# df.loc[(df['DSM_IV_TR'] != 0) & (df['DSM_IV_TR'] != 1) & (df['DSM_IV_TR'] != 2) & (df['DSM_IV_TR'] != 3) & (df['DSM_IV_TR'] != 4)]
for SITE in SITES:
NUM_AUT_DSM_V = df.loc[(df['DX_GROUP'] == 1) & (df['SITE_ID'] == SITE)].shape[0]
NUM_AUT_MALE_DSM_V = df.loc[(df['DX_GROUP'] == 1) & (df['SEX'] == 1) & (df['SITE_ID'] == SITE)].shape[0]
NUM_AUT_FEMALE_DSM_V = df.loc[(df['DX_GROUP'] == 1) & (df['SEX'] == 2) & (df['SITE_ID'] == SITE)].shape[0]
NUM_AUT_AGE_lte12_DSM_V = df.loc[(df['DX_GROUP'] == 1) & (df['AGE_AT_SCAN'] <= 12) & (df['SITE_ID'] == SITE)].shape[0]
NUM_AUT_AGE_12_18_DSM_V = df.loc[(df['DX_GROUP'] == 1) & (df['AGE_AT_SCAN'] > 12) & (df['AGE_AT_SCAN'] <= 18) & (df['SITE_ID'] == SITE)].shape[0]
NUM_AUT_AGE_18_24_DSM_V = df.loc[(df['DX_GROUP'] == 1) & (df['AGE_AT_SCAN'] > 18) & (df['AGE_AT_SCAN'] <= 24) & (df['SITE_ID'] == SITE)].shape[0]
NUM_AUT_AGE_24_34_DSM_V = df.loc[(df['DX_GROUP'] == 1) & (df['AGE_AT_SCAN'] > 24) & (df['AGE_AT_SCAN'] <= 34) & (df['SITE_ID'] == SITE)].shape[0]
NUM_AUT_AGE_34_50_DSM_V = df.loc[(df['DX_GROUP'] == 1) & (df['AGE_AT_SCAN'] > 34) & (df['AGE_AT_SCAN'] <= 50) & (df['SITE_ID'] == SITE)].shape[0]
NUM_AUT_AGE_gt50_DSM_V = df.loc[(df['DX_GROUP'] == 1) & (df['AGE_AT_SCAN'] > 50 ) & (df['SITE_ID'] == SITE)].shape[0]
NUM_AUT_DSM_IV = df.loc[(df['DSM_IV_TR'] == 1) & (df['SITE_ID'] == SITE)].shape[0]
NUM_AUT_MALE_DSM_IV = df.loc[(df['DSM_IV_TR'] == 1) & (df['SEX'] == 1) & (df['SITE_ID'] == SITE)].shape[0]
NUM_AUT_FEMALE_DSM_IV = df.loc[(df['DSM_IV_TR'] == 1) & (df['SEX'] == 2) & (df['SITE_ID'] == SITE)].shape[0]
NUM_ASP_DSM_IV = df.loc[(df['DSM_IV_TR'] == 2) & (df['SITE_ID'] == SITE)].shape[0]
NUM_ASP_MALE_DSM_IV = df.loc[(df['DSM_IV_TR'] == 2) & (df['SEX'] == 1) & (df['SITE_ID'] == SITE)].shape[0]
NUM_ASP_FEMALE_DSM_IV = df.loc[(df['DSM_IV_TR'] == 2) & (df['SEX'] == 2) & (df['SITE_ID'] == SITE)].shape[0]
NUM_PDDNOS_DSM_IV = df.loc[(df['DSM_IV_TR'] == 3) & (df['SITE_ID'] == SITE)].shape[0]
NUM_PDDNOS_MALE_DSM_IV = df.loc[(df['DSM_IV_TR'] == 3) & (df['SEX'] == 1) & (df['SITE_ID'] == SITE)].shape[0]
NUM_PDDNOS_FEMALE_DSM_IV = df.loc[(df['DSM_IV_TR'] == 3) & (df['SEX'] == 2) & (df['SITE_ID'] == SITE)].shape[0]
NUM_ASP_PDDNOS_DSM_IV = df.loc[(df['DSM_IV_TR'] == 4) & (df['SITE_ID'] == SITE)].shape[0]
NUM_ASP_PDDNOS_MALE_DSM_IV = df.loc[(df['DSM_IV_TR'] == 4) & (df['SEX'] == 1) & (df['SITE_ID'] == SITE)].shape[0]
NUM_ASP_PDDNOS_FEMALE_DSM_IV = df.loc[(df['DSM_IV_TR'] == 4) & (df['SEX'] == 2) & (df['SITE_ID'] == SITE)].shape[0]
NUM_TD = df.loc[(df['DX_GROUP'] == 2) & (df['SITE_ID'] == SITE)].shape[0]
NUM_TD_MALE = df.loc[(df['DX_GROUP'] == 2) & (df['SEX'] == 1) & (df['SITE_ID'] == SITE)].shape[0]
NUM_TD_FEMALE = df.loc[(df['DX_GROUP'] == 2) & (df['SEX'] == 2) & (df['SITE_ID'] == SITE)].shape[0]
NUM_TD_AGE_lte12 = df.loc[(df['DX_GROUP'] == 2) & (df['AGE_AT_SCAN'] <= 12) & (df['SITE_ID'] == SITE)].shape[0]
NUM_TD_AGE_12_18 = df.loc[(df['DX_GROUP'] == 2) & (df['AGE_AT_SCAN'] > 12) & (df['AGE_AT_SCAN'] <= 18) & (df['SITE_ID'] == SITE)].shape[0]
NUM_TD_AGE_18_24 = df.loc[(df['DX_GROUP'] == 2) & (df['AGE_AT_SCAN'] > 18) & (df['AGE_AT_SCAN'] <= 24) & (df['SITE_ID'] == SITE)].shape[0]
NUM_TD_AGE_24_34 = df.loc[(df['DX_GROUP'] == 2) & (df['AGE_AT_SCAN'] > 24) & (df['AGE_AT_SCAN'] <= 34) & (df['SITE_ID'] == SITE)].shape[0]
NUM_TD_AGE_34_50 = df.loc[(df['DX_GROUP'] == 2) & (df['AGE_AT_SCAN'] > 34) & (df['AGE_AT_SCAN'] <= 50) & (df['SITE_ID'] == SITE)].shape[0]
NUM_TD_AGE_gt50 = df.loc[(df['DX_GROUP'] == 2) & (df['AGE_AT_SCAN'] > 50 ) & (df['SITE_ID'] == SITE)].shape[0]
tr = 0
volumes = 0
xdim_mm = 0
ydim_mm = 0
zdim_mm = 0
xdim_voxels = 0
ydim_voxels = 0
zdim_voxels = 0
# Accessing scan details
for json_path in scan_param_paths:
extracted_site = json_path.split('/')[-2]
if (SITE).lower() in (extracted_site).lower():
with open(json_path, 'rt') as fp:
print('Site matched with ',json_path)
task_info = json.load(fp)
# Accessing the contents:
tr = task_info['RepetitionTime']
volumes = task_info['NumberofMeasurements']
xdim_mm, ydim_mm = task_info['PixelSpacing'].split('x')
zdim_mm = task_info['SpacingBetweenSlices']
xdim_voxels, ydim_voxels = task_info['AcquisitionMatrix'].split('x')
zdim_voxels = task_info['NumberOfSlices']
_df = pd.DataFrame({
'SITE_NAME': SITE ,
'TR': tr ,
'VOLUMES': volumes,
'xdim_mm':xdim_mm,
'ydim_mm':ydim_mm,
'zdim_mm':zdim_mm,
'xdim_voxels':xdim_voxels,
'ydim_voxels':ydim_voxels,
'zdim_voxels':zdim_voxels,
'NUM_AUT_DSM_V': NUM_AUT_DSM_V ,
'NUM_AUT_MALE_DSM_V': NUM_AUT_MALE_DSM_V ,
'NUM_AUT_FEMALE_DSM_V': NUM_AUT_FEMALE_DSM_V,
'NUM_AUT_AGE_lte12_DSM_V' : NUM_AUT_AGE_lte12_DSM_V,
'NUM_AUT_AGE_12_18_DSM_V' : NUM_AUT_AGE_12_18_DSM_V,
'NUM_AUT_AGE_18_24_DSM_V': NUM_AUT_AGE_18_24_DSM_V,
'NUM_AUT_AGE_24_34_DSM_V' :NUM_AUT_AGE_24_34_DSM_V,
'NUM_AUT_AGE_34_50_DSM_V' : NUM_AUT_AGE_34_50_DSM_V,
'NUM_AUT_AGE_gt50_DSM_V' : NUM_AUT_AGE_gt50_DSM_V,
'NUM_AUT_DSM_IV' : NUM_AUT_DSM_IV,
'NUM_AUT_MALE_DSM_IV' : NUM_AUT_MALE_DSM_IV,
'NUM_AUT_FEMALE_DSM_IV' : NUM_AUT_FEMALE_DSM_IV,
'NUM_ASP_DSM_IV' : NUM_ASP_DSM_IV,
'NUM_ASP_MALE_DSM_IV' : NUM_ASP_MALE_DSM_IV,
'NUM_ASP_FEMALE_DSM_IV' : NUM_ASP_FEMALE_DSM_IV,
'NUM_PDDNOS_DSM_IV' : NUM_PDDNOS_DSM_IV,
'NUM_PDDNOS_MALE_DSM_IV' : NUM_PDDNOS_MALE_DSM_IV,
'NUM_PDDNOS_FEMALE_DSM_IV' : NUM_PDDNOS_FEMALE_DSM_IV,
'NUM_ASP_PDDNOS_DSM_IV' : NUM_ASP_PDDNOS_DSM_IV,
'NUM_ASP_PDDNOS_MALE_DSM_IV' : NUM_ASP_PDDNOS_MALE_DSM_IV,
'NUM_ASP_PDDNOS_FEMALE_DSM_IV' : NUM_ASP_PDDNOS_FEMALE_DSM_IV,
'NUM_TD' : NUM_TD,
'NUM_TD_MALE' : NUM_TD_MALE,
'NUM_TD_FEMALE' : NUM_TD_FEMALE,
'NUM_TD_AGE_lte12' : NUM_TD_AGE_lte12,
'NUM_TD_AGE_12_18' : NUM_TD_AGE_12_18,
'NUM_TD_AGE_18_24' : NUM_TD_AGE_18_24,
'NUM_TD_AGE_24_34' : NUM_TD_AGE_24_34,
'NUM_TD_AGE_34_50' : NUM_TD_AGE_34_50,
'NUM_TD_AGE_gt50' : NUM_TD_AGE_gt50
},index=[0],columns = [ 'SITE_NAME',
'TR',
'VOLUMES',
'xdim_mm',
'ydim_mm',
'zdim_mm',
'xdim_voxels',
'ydim_voxels',
'zdim_voxels',
'NUM_AUT_DSM_V',
'NUM_AUT_MALE_DSM_V',
'NUM_AUT_FEMALE_DSM_V',
'NUM_AUT_AGE_lte12_DSM_V',
'NUM_AUT_AGE_12_18_DSM_V',
'NUM_AUT_AGE_18_24_DSM_V',
'NUM_AUT_AGE_24_34_DSM_V',
'NUM_AUT_AGE_34_50_DSM_V',
'NUM_AUT_AGE_gt50_DSM_V',
'NUM_AUT_DSM_IV',
'NUM_AUT_MALE_DSM_IV',
'NUM_AUT_FEMALE_DSM_IV',
'NUM_ASP_DSM_IV',
'NUM_ASP_MALE_DSM_IV',
'NUM_ASP_FEMALE_DSM_IV',
'NUM_PDDNOS_DSM_IV',
'NUM_PDDNOS_MALE_DSM_IV',
'NUM_PDDNOS_FEMALE_DSM_IV',
'NUM_ASP_PDDNOS_DSM_IV',
'NUM_ASP_PDDNOS_MALE_DSM_IV',
'NUM_ASP_PDDNOS_FEMALE_DSM_IV',
'NUM_TD',
'NUM_TD_MALE',
'NUM_TD_FEMALE',
'NUM_TD_AGE_lte12',
'NUM_TD_AGE_12_18',
'NUM_TD_AGE_18_24',
'NUM_TD_AGE_24_34',
'NUM_TD_AGE_34_50',
'NUM_TD_AGE_gt50'])
data_frame = data_frame.append(_df, ignore_index=True)[_df.columns.tolist()]
# df = pd.DataFrame(raw_data, columns = [])
# Sanity Check
# NUM_AUT_DSM_V.shape[0] + NUM_TD.shape[0]
# df.loc[(df['DSM_IV_TR'] == 0)].shape[0] + NUM_AUT_DSM_V.shape[0] # Not exhaustive
In [ ]:
# 'MAX_MUN'.lower() in '/home1/varunk/data/ABIDE1/RawDataBIDs/MaxMun_a/task-rest_bold.json'.lower()
In [ ]:
_df
In [ ]:
data_frame
In [ ]:
# Save the csv file
data_frame.to_csv('demographics.csv')
In [15]:
# df = pd.read_csv('/home1/varunk/data/ABIDE1/RawDataBIDs/composite_phenotypic_file.csv') # , index_col='SUB_ID'
# df = df.sort_values(['SUB_ID'])
# df_td_lt18_m_eyesopen = df.loc[(df['SEX'] == 1) & (df['AGE_AT_SCAN'] <=18) & (df['DSM_IV_TR'] == 0) & (df['EYE_STATUS_AT_SCAN'] == 1)]
# df_td_lt18_m_eyesopen;
# df_td_lt18_m_eyesclosed = df.loc[(df['SEX'] == 1) & (df['AGE_AT_SCAN'] <=18) & (df['DSM_IV_TR'] == 0) & (df['EYE_STATUS_AT_SCAN'] == 2)]
# df_td_lt18_m_eyesclosed;
In [16]:
# df_td_lt18_m_eyesopen;
# df_td_lt18_m_eyesclosed;
In [17]:
# Reading TR values
tr_path = '/home1/varunk/results_again_again/ABIDE1_Preprocess_Datasink/tr_paths/tr_list.npy'
tr = np.load(tr_path)
In [18]:
np.unique(tr)
Out[18]:
In [19]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
bins = np.arange(0,3.5,0.1)
res = plt.hist(tr, rwidth=0.3, align='left', bins= bins)
# plt.xticks([0,0.5,1,1.5,2,2.5,3])
plt.xlabel('TR')
plt.ylabel('Number of participants')
plt.title('Frequency distribution of TRs')
# plt.text(60, .025, r'$\mu=100,\ \sigma=15$')
Out[19]:
In [20]:
np.unique(tr)
Out[20]:
In [21]:
df = pd.read_csv('/home1/varunk/data/ABIDE1/RawDataBIDs/composite_phenotypic_file.csv') # , index_col='SUB_ID'
df = df.sort_values(['SUB_ID'])
df_td_lt18_m_eyesopen = df.loc[(df['SEX'] == 1) & (df['AGE_AT_SCAN'] <=18) & (df['DSM_IV_TR'] == 0) & (df['EYE_STATUS_AT_SCAN'] == 1)]
df_td_lt18_m_eyesopen;
df_td_lt18_m_eyesclosed = df.loc[(df['SEX'] == 1) & (df['AGE_AT_SCAN'] <=18) & (df['DSM_IV_TR'] == 0) & (df['EYE_STATUS_AT_SCAN'] == 2)]
df_td_lt18_m_eyesclosed;
df_aut_lt18_m_eyesopen = df.loc[(df['SEX'] == 1) & (df['AGE_AT_SCAN'] <=18) & (df['DSM_IV_TR'] == 1) & (df['EYE_STATUS_AT_SCAN'] == 1)]
df_aut_lt18_m_eyesopen;
df_aut_lt18_m_eyesclosed = df.loc[(df['SEX'] == 1) & (df['AGE_AT_SCAN'] <=18) & (df['DSM_IV_TR'] == 1) & (df['EYE_STATUS_AT_SCAN'] == 2)]
df_aut_lt18_m_eyesclosed;
In [22]:
df_td_lt18_m_eyesopen_sub_id = df_td_lt18_m_eyesopen.as_matrix(['SUB_ID']).squeeze()
df_td_lt18_m_eyesclosed_sub_id = df_td_lt18_m_eyesclosed.as_matrix(['SUB_ID']).squeeze()
df_aut_lt18_m_eyesopen_sub_id = df_aut_lt18_m_eyesopen.as_matrix(['SUB_ID']).squeeze()
df_aut_lt18_m_eyesclosed_sub_id = df_aut_lt18_m_eyesclosed.as_matrix(['SUB_ID']).squeeze()
In [23]:
import re
sub_id = []
atlas_paths = np.load('/home1/varunk/results_again_again/ABIDE1_Preprocess_Datasink/atlas_paths/atlas_file_list.npy')
for path in atlas_paths:
sub_id_extracted = re.search('.+_subject_id_(\d+)', path).group(1)
sub_id.append(sub_id_extracted)
In [24]:
sub_id = list(map(int, sub_id))
In [25]:
# df_sub_id = df.as_matrix(['SUB_ID']).squeeze()
In [26]:
# Number of TD subjects with Age 12 to 18
df_td_lt18_m_eyesopen = df.loc[(df['SEX'] == 1) & (df['AGE_AT_SCAN'] >=12) &(df['AGE_AT_SCAN'] <=18) & (df['DSM_IV_TR'] == 0) & (df['EYE_STATUS_AT_SCAN'] == 1)]
df_td_lt18_m_eyesopen.shape
Out[26]:
In [27]:
# Number of Autistic subjects with Age 12 to 18
df_aut_lt18_m_eyesopen = df.loc[(df['SEX'] == 1) & (df['AGE_AT_SCAN'] >=12) &(df['AGE_AT_SCAN'] <=18) & (df['DSM_IV_TR'] == 1) & (df['EYE_STATUS_AT_SCAN'] == 1)]
df_aut_lt18_m_eyesopen.shape
Out[27]:
In [28]:
# tr[np.where(df_sub_id == df_td_lt18_m_eyesopen_sub_id)]
In [29]:
# np.isin(sub_id,df_td_lt18_m_eyesopen_sub_id)
In [30]:
tr1 = tr[np.isin(sub_id, df_aut_lt18_m_eyesopen_sub_id)]
bins = np.arange(0,3.5,0.1)
res = plt.hist(tr1, rwidth=0.3, align='left', bins= bins)
# plt.xticks([0,0.5,1,1.5,2,2.5,3])
plt.xlabel('TR')
plt.ylabel('Number of participants')
plt.title('Frequency distribution of TRs')
Out[30]:
In [31]:
tr2 = tr[np.isin(sub_id, df_td_lt18_m_eyesopen_sub_id)]
bins = np.arange(0,3.5,0.1)
res = plt.hist(tr2, rwidth=0.3, align='left', bins= bins)
# plt.xticks([0,0.5,1,1.5,2,2.5,3])
plt.xlabel('TR')
plt.ylabel('Number of participants')
plt.title('Frequency distribution of TRs')
Out[31]:
In [ ]:
In [ ]:
In [32]:
tr3 = tr[np.isin(sub_id, df_aut_lt18_m_eyesclosed_sub_id)]
bins = np.arange(0,3.5,0.1)
res = plt.hist(tr3, rwidth=0.3, align='left', bins= bins)
# plt.xticks([0,0.5,1,1.5,2,2.5,3])
plt.xlabel('TR')
plt.ylabel('Number of participants')
plt.title('Frequency distribution of TRs')
Out[32]:
In [33]:
tr4 = tr[np.isin(sub_id, df_td_lt18_m_eyesclosed_sub_id)]
bins = np.arange(0,3.5,0.1)
res = plt.hist(tr4, rwidth=0.3, align='left', bins= bins)
# plt.xticks([0,0.5,1,1.5,2,2.5,3])
plt.xlabel('TR')
plt.ylabel('Number of participants')
plt.title('Frequency distribution of TRs')
Out[33]:
In [34]:
df_td_lt18_m_eyesopen_age = df_td_lt18_m_eyesopen.as_matrix(['AGE_AT_SCAN']).squeeze()
df_td_lt18_m_eyesclosed_age = df_td_lt18_m_eyesclosed.as_matrix(['AGE_AT_SCAN']).squeeze()
df_aut_lt18_m_eyesopen_age = df_aut_lt18_m_eyesopen.as_matrix(['AGE_AT_SCAN']).squeeze()
df_aut_lt18_m_eyesclosed_age = df_aut_lt18_m_eyesclosed.as_matrix(['AGE_AT_SCAN']).squeeze()
bins = np.arange(0,20,1)
# res = plt.hist(df_td_lt18_m_eyesopen_age, rwidth=0.3, align='left')
# res2 = plt.hist(df_aut_lt18_m_eyesopen_age, rwidth=0.3, align='left', bins= bins)
# # plt.xticks([0,0.5,1,1.5,2,2.5,3])
# plt.xlabel('TR')
# plt.ylabel('Number of participants')
# plt.title('Frequency distribution of TRs')
# import random
# import numpy
from matplotlib import pyplot
# x = [random.gauss(3,1) for _ in range(400)]
# y = [random.gauss(4,2) for _ in range(400)]
# bins = numpy.linspace(-10, 10, 100)
pyplot.hist(df_td_lt18_m_eyesopen_age, alpha=0.5,bins=bins, label='TD',rwidth=0.1, align='left')
pyplot.hist(df_aut_lt18_m_eyesopen_age,alpha=0.5, bins=bins, label='AUT',rwidth=0.1,align='right')
pyplot.legend(loc='upper right')
pyplot.xlabel('AGE')
pyplot.show()
In [35]:
pyplot.hist(df_td_lt18_m_eyesclosed_age, alpha=0.5,bins=bins, label='TD',rwidth=0.1, align='left')
pyplot.hist(df_aut_lt18_m_eyesclosed_age,alpha=0.5, bins=bins, label='AUT',rwidth=0.1,align='right')
pyplot.legend(loc='upper right')
pyplot.xlabel('AGE')
pyplot.show()
In [36]:
pyplot.yticks(np.arange(0,20,1))
res = pyplot.boxplot([df_td_lt18_m_eyesopen_age,df_aut_lt18_m_eyesopen_age])
In [37]:
pyplot.yticks(np.arange(0,20,1))
res = pyplot.boxplot([df_td_lt18_m_eyesclosed_age, df_aut_lt18_m_eyesclosed_age])
In [38]:
eyes_open_age = np.concatenate((df_td_lt18_m_eyesopen_age,df_aut_lt18_m_eyesopen_age))
eyes_closed_age = np.concatenate((df_td_lt18_m_eyesclosed_age,df_aut_lt18_m_eyesclosed_age))
pyplot.yticks(np.arange(0,20,1))
res = pyplot.boxplot([eyes_open_age, eyes_closed_age])
In [50]:
from scipy import stats
print(stats.ttest_ind(eyes_open_age,eyes_closed_age, equal_var = False))
print('Mean: ',np.mean(eyes_open_age), np.mean(eyes_closed_age))
print('Std: ',np.std(eyes_open_age), np.std(eyes_closed_age))
In [49]:
# stats.ttest_ind(eyes_open_age,eyes_closed_age, equal_var = False)
eyes_open_tr = np.concatenate((tr1,tr2))
eyes_closed_tr = np.concatenate((tr3,tr4))
print(stats.ttest_ind(eyes_open_tr,eyes_closed_tr, equal_var = False))
print('Mean: ',np.mean(eyes_open_tr), np.mean(eyes_closed_tr))
print('Std: ',np.std(eyes_open_tr), np.std(eyes_closed_tr))
In [46]:
print(stats.ttest_ind(df_aut_lt18_m_eyesopen_age, df_td_lt18_m_eyesopen_age, equal_var = False))
print('Mean: ',np.mean(df_aut_lt18_m_eyesopen_age), np.mean(df_td_lt18_m_eyesopen_age))
print('Std: ',np.std(df_aut_lt18_m_eyesopen_age), np.std(df_td_lt18_m_eyesopen_age))
In [47]:
print(stats.ttest_ind(df_aut_lt18_m_eyesclosed_age, df_td_lt18_m_eyesclosed_age, equal_var = False))
print('Mean: ',np.mean(df_aut_lt18_m_eyesclosed_age),np.mean(df_td_lt18_m_eyesclosed_age))
print('Std: ',np.std(df_aut_lt18_m_eyesclosed_age),np.std(df_td_lt18_m_eyesclosed_age))
https://www.jiscmail.ac.uk/cgi-bin/webadmin?A2=fsl;cda6e2ea.1112
Format: rot_x, rot_y, rot_z, trans_x, trans_y, trans_z
In [48]:
motion_params_npy = '/home1/varunk/results_again_again/ABIDE1_Preprocess_Datasink/motion_params_paths/motion_params_file_list.npy'
mot_params_paths = np.load(motion_params_npy)
In [ ]:
in_file = mot_params_paths[0]
trans_x = []
trans_y = []
trans_z = []
rot_x = []
rot_y = []
rot_z = []
# for in_file in mot_params_paths:
with open(in_file) as f:
for line in f:
line = line.split(' ')
print(line)
trans_x.append(float(line[6]))
trans_y.append(float(line[8]))
trans_z.append(float(line[10]))
rot_x.append(float(line[0]))
rot_y.append(float(line[2]))
rot_z.append(float(line[4]))
In [ ]:
float('0.0142863')
In [ ]:
max(rot_y)
In [172]:
# Load demographics file
df_demographics = pd.read_csv('/home1/varunk/Autism-Connectome-Analysis-brain_connectivity/notebooks/demographics.csv')
# df_demographics
df_demographics_volumes = df_demographics.as_matrix(['SITE_NAME','VOLUMES']).squeeze()
df_demographics_volumes
Out[172]:
In [175]:
df_phenotype = pd.read_csv('/home1/varunk/data/ABIDE1/RawDataBIDs/composite_phenotypic_file.csv') # , index_col='SUB_ID'
df_phenotype = df_phenotype.sort_values(['SUB_ID'])
volumes_bins = np.array([[0,150],[151,200],[201,250],[251,300]])
bins_volumes_AUT = []
bins_volumes_TD = []
for counter, _bin in enumerate(volumes_bins):
df_demographics_volumes_selected_bin = df_demographics_volumes[np.where(np.logical_and((df_demographics_volumes[:,1] >= _bin[0]),(df_demographics_volumes[:,1] <= _bin[1])))]
selected_AUT = pd.DataFrame()
selected_TD = pd.DataFrame()
for site in df_demographics_volumes_selected_bin:
print(site[0])
selected_AUT = pd.concat([selected_AUT,df_phenotype.loc[(df_phenotype['SEX'] == 1) & (df_phenotype['DSM_IV_TR'] == 1) & (df_phenotype['SITE_ID'] == site[0])]])
selected_TD = pd.concat([selected_AUT,df_phenotype.loc[(df_phenotype['SEX'] == 1) & (df_phenotype['DSM_IV_TR'] == 0) & (df_phenotype['SITE_ID'] == site[0])]])
bins_volumes_AUT.append(selected_AUT)
bins_volumes_TD.append(selected_TD)
In [176]:
f = bins_volumes_AUT[0]
# f.loc[[2,3,4,5]]
f
Out[176]:
In [177]:
f.iloc[[2,3,4,5,7]]
Out[177]:
In [178]:
# num_bins = 4
print('Range ','TD ','AUT ','Ratio TD/AUT')
ratio = np.zeros((len(bins_volumes_AUT)))
for i in range(len(bins_volumes_AUT)):
ratio[i] = bins_volumes_TD[i].shape[0]/bins_volumes_AUT[i].shape[0]
print(volumes_bins[i],bins_volumes_TD[i].shape[0],bins_volumes_AUT[i].shape[0], ratio[i])
In [179]:
min_ratio = np.min(ratio)
min_index = np.argmin(ratio)
In [180]:
new_TD = np.zeros((len(bins_volumes_AUT)))
print('Range ','TD ','AUT ')
for i in range(len(bins_volumes_AUT)):
new_TD[i] = np.ceil(bins_volumes_AUT[i].shape[0] * min_ratio)
print(volumes_bins[i],new_TD[i],bins_volumes_AUT[i].shape[0])
In [182]:
# Now loop over all the bins created and select the specific number of subjects randomly from each TD bin
TD_idx_list = []
selected_df_TD = pd.DataFrame()
for i in range(len(bins_volumes_TD)):
idx = np.arange(len(bins_volumes_TD[i]))
np.random.shuffle(idx)
idx = idx[0:int(new_TD[i])]
TD_idx_list.append(idx)
selected_df_TD = pd.concat([selected_df_TD, bins_volumes_TD[i].iloc[idx]])
selected_df_TD= selected_df_TD.sort_values(['SUB_ID'])
# print(idx)
In [184]:
# Sanity check to see of no subjects are repeated
# subid = selected_df_TD.sort_values(['SUB_ID']).as_matrix(['SUB_ID']).squeeze()
# len(np.unique(subid)) == len(subid)
Out[184]:
In [185]:
# Sanity check to see of the number of subjects are same as expected
# len(subid) == (89 + 105 + 109 + 56)
Out[185]:
In [189]:
# Sanity check so that no subject index is repeated
# len(np.unique(TD_idx_list[3])) == len(TD_idx_list[3] )
# sanity check to check the new number of TD subjects in each Volumes bin
# len(TD_idx_list[3]) == 56
In [191]:
selected_df_TD
Out[191]:
In [193]:
age_bins = np.array([[0,9],[9,12],[12,15],[15,18]])
bins_age_AUT = []
bins_age_TD = []
# for counter, _bin in enumerate(age_bins):
for age in age_bins:
selected_AUT = pd.DataFrame()
selected_TD = pd.DataFrame()
print(age[0], age[1])
selected_AUT = pd.concat([selected_AUT,df_phenotype.loc[(df_phenotype['SEX'] == 1)
& (df_phenotype['DSM_IV_TR'] == 1)
& (df_phenotype['AGE_AT_SCAN'] > age[0])
& (df_phenotype['AGE_AT_SCAN'] <= age[1]) ]])
selected_TD = pd.concat([selected_TD,selected_df_TD.loc[(selected_df_TD['SEX'] == 1)
& (selected_df_TD['DSM_IV_TR'] == 0)
& (selected_df_TD['AGE_AT_SCAN'] > age[0])
& (selected_df_TD['AGE_AT_SCAN'] <= age[1]) ]])
bins_age_AUT.append(selected_AUT)
bins_age_TD.append(selected_TD)
In [194]:
bins_age_TD[0]
Out[194]:
In [195]:
# num_bins = 4
print('Original data stats')
print('Age Range ','TD ','AUT ','Ratio TD/AUT')
ratio = np.zeros((len(bins_age_TD)))
for i in range(len(bins_age_TD)):
ratio[i] = bins_age_TD[i].shape[0]/bins_age_AUT[i].shape[0]
print(age_bins[i],bins_age_TD[i].shape[0],bins_age_AUT[i].shape[0], ratio[i])
In [168]:
min_ratio = np.min(ratio)
min_index = np.argmin(ratio)
In [169]:
new_TD = np.zeros((len(bins_age_AUT)))
print('Matched data stats')
print('Age Range ','TD ','AUT ')
for i in range(len(bins_age_AUT)):
new_TD[i] = np.ceil(bins_age_AUT[i].shape[0] * min_ratio)
print(age_bins[i],new_TD[i],bins_age_AUT[i].shape[0])
In [170]:
# Now loop over all the bins created and select the specific number of subjects randomly from each TD bin
TD_idx_list = []
selected_df_TD = pd.DataFrame()
for i in range(len(bins_age_TD)):
idx = np.arange(len(bins_age_TD[i]))
np.random.shuffle(idx)
idx = idx[0:int(new_TD[i])]
TD_idx_list.append(idx)
selected_df_TD = pd.concat([selected_df_TD, bins_age_TD[i].iloc[idx]])
selected_df_TD = selected_df_TD.sort_values(['SUB_ID'])
# print(idx)
In [215]:
selected_df_TD
# selected_df_TD.as_matrix(['SUB_ID']).squeeze()
Out[215]:
In [ ]:
In [104]:
x = np.arange(10)
np.random.shuffle(x)
In [105]:
x
Out[105]:
In [80]:
48 * min_ratio
Out[80]:
In [ ]:
# selected = selected.loc[(selected['SEX'] == 1) & (selected['DSM_IV_TR'] == 0) & (selected['SITE_ID'] == site[0]) & (selected['EYE_STATUS_AT_SCAN'] == 1)]
In [196]:
selected;
In [44]:
df_phenotype.loc[(df_phenotype['SEX'] == 1) & (df_phenotype['DSM_IV_TR'] == 0) & (df_phenotype['SITE_ID'] == 'TRINITY') & (df_phenotype['EYE_STATUS_AT_SCAN'] == 1)]
Out[44]:
In [225]:
def volumes_matching(volumes_bins, demographics_file_path, phenotype_file_path):
# Load demographics file
# demographics_file_path = '/home1/varunk/Autism-Connectome-Analysis-brain_connectivity/notebooks/demographics.csv'
# phenotype_file_path = '/home1/varunk/data/ABIDE1/RawDataBIDs/composite_phenotypic_file.csv'
# volumes_bins = np.array([[0,150],[151,200],[201,250],[251,300]])
df_demographics = pd.read_csv(demographics_file_path)
df_demographics_volumes = df_demographics.as_matrix(['SITE_NAME','VOLUMES']).squeeze()
df_phenotype = pd.read_csv(phenotype_file_path)
df_phenotype = df_phenotype.sort_values(['SUB_ID'])
bins_volumes_AUT_data = []
bins_volumes_TD_data = []
for counter, _bin in enumerate(volumes_bins):
df_demographics_volumes_selected_bin = df_demographics_volumes[np.where(np.logical_and((df_demographics_volumes[:,1] >= _bin[0]),(df_demographics_volumes[:,1] <= _bin[1])))]
selected_AUT = pd.DataFrame()
selected_TD = pd.DataFrame()
for site in df_demographics_volumes_selected_bin:
# print(site[0])
selected_AUT = pd.concat([selected_AUT,df_phenotype.loc[(df_phenotype['SEX'] == 1) & (df_phenotype['DSM_IV_TR'] == 1) & (df_phenotype['SITE_ID'] == site[0])]])
selected_TD = pd.concat([selected_AUT,df_phenotype.loc[(df_phenotype['SEX'] == 1) & (df_phenotype['DSM_IV_TR'] == 0) & (df_phenotype['SITE_ID'] == site[0])]])
bins_volumes_AUT_data.append(selected_AUT)
bins_volumes_TD_data.append(selected_TD)
selected_df_TD = matching(volumes_bins, bins_volumes_TD_data, bins_volumes_AUT_data)
# sub_ids = selected_df_TD.as_matrix(['SUB_ID']).squeeze()
selected_df_TD.to_csv('selected_TD.csv')
return selected_df_TD
In [226]:
def matching(bins, bins_TD_data, bins_AUT_data):
# num_bins = 4
print('Original data stats')
print('Range ','TD ','AUT ','Ratio TD/AUT')
ratio = np.zeros((len(bins_TD_data)))
for i in range(len(bins_TD_data)):
ratio[i] = bins_TD_data[i].shape[0]/bins_AUT_data[i].shape[0]
print(bins[i],bins_TD_data[i].shape[0],bins_AUT_data[i].shape[0], ratio[i])
min_ratio = np.min(ratio)
min_index = np.argmin(ratio)
new_TD = np.zeros((len(bins_TD_data)))
print('Matched data stats')
print('Range ','TD ','AUT ')
for i in range(len(bins_TD_data)):
new_TD[i] = np.ceil(bins_AUT_data[i].shape[0] * min_ratio)
print(bins[i],new_TD[i],bins_AUT_data[i].shape[0])
# Now loop over all the bins created and select the specific number of subjects randomly from each TD bin
TD_idx_list = []
selected_df_TD = pd.DataFrame()
for i in range(len(bins_TD_data)):
idx = np.arange(len(bins_TD_data[i]))
np.random.shuffle(idx)
idx = idx[0:int(new_TD[i])]
TD_idx_list.append(idx)
selected_df_TD = pd.concat([selected_df_TD, bins_TD_data[i].iloc[idx]])
selected_df_TD = selected_df_TD.sort_values(['SUB_ID'])
return selected_df_TD
In [222]:
demographics_file_path = '/home1/varunk/Autism-Connectome-Analysis-brain_connectivity/notebooks/demographics.csv'
phenotype_file_path = '/home1/varunk/data/ABIDE1/RawDataBIDs/composite_phenotypic_file.csv'
volumes_bins = np.array([[0,150],[151,200],[201,250],[251,300]])
volumes_matching(volumes_bins, demographics_file_path, phenotype_file_path)
Out[222]:
In [ ]:
In [197]:
df_phenotype.loc[(df_phenotype['SITE_ID'] == 'TRINITY')];
In [25]:
df_demographics_volumes_selected_bin
Out[25]:
In [15]:
df_phenotype = pd.read_csv('/home1/varunk/data/ABIDE1/RawDataBIDs/composite_phenotypic_file.csv') # , index_col='SUB_ID'
# df_phenotype = df.as_matrix(['SITE_ID']).squeeze()
df = df.sort_values(['SUB_ID'])
df_td_lt18_m_eyesopen_vol_100_150 = df.loc[(df['SEX'] == 1) & (df['AGE_AT_SCAN'] <=18) & (df['DSM_IV_TR'] == 0) & (df['EYE_STATUS_AT_SCAN'] == 1)]
df_td_lt18_m_eyesopen_vol_100_150;
In [16]:
np.unique(df_phenotype)
Out[16]:
In [ ]:
In [ ]:
In [ ]:
np.mean(eyes_open_tr), np.mean(eyes_closed_tr)
In [ ]:
df_td_lt18_m_eyesopen_age
In [ ]:
df_td_lt18_m_eyesopen_sub_id
In [ ]:
tr[637]
In [ ]:
'50003' in X[1]
In [ ]:
In [ ]:
In [ ]: