In [13]:
import os
import re
import sys
import glob
import time
import numpy as np
import nibabel as nib
from scipy import io as si

In [2]:
mask_dir = '/data1/cambridge/template'
data_dir = '/data1/abide/Full/abide_release_sym_gsc0_lp01'
out_dir = '/data1/john'
if not os.path.isdir(out_dir):
    os.makedirs(out_dir)

mask_temp = 'template_cambridge_basc_multiscale_sym_scale{:03}.nii.gz'
data_temp = 'fmri_*_session_1_run1.nii.gz'
out_temp = 'connectomes_abide.mat'

scales = [7,12,20,36,64,122,197,325,444]

In [3]:
# Get all the files in the directory
files = glob.glob(os.path.join(data_dir, '*', data_temp))
subs = dict()
for f in files:
    fname = os.path.basename(f)
    sub = re.search('(?<=fmri_)[0-9]+',fname).group()
    if not sub in subs.keys():
        subs[sub] = f
    else:
        message('There are 2 of {}, the second one was {}'.format(sub, f))
        raise Exception(message)

In [4]:
num_subs = len(subs.keys())
scale_dict = dict()
for scale in scales:
    scale_name = 'scale_{}'.format(scale)
    mask_name = mask_temp.format(scale)
    mask_path = os.path.join(mask_dir, mask_name)
    m_img = nib.load(mask_path)
    mask = m_img.get_data()
    scale_dict[scale_name] = (mask, np.unique(mask[mask!=0]))

In [5]:
all_dict = dict()
num_subs = len(subs.keys())
avg_time = np.array([])
for s_id, sub in enumerate(subs.keys()):
    p_c = float(s_id + 1) / num_subs * 100
    rem = num_subs - (s_id + 1)

    start = time.time()
    s_img = nib.load(subs[sub])
    data = s_img.get_data()
    t_points = data.shape[3]

    for scale in scales:
        scale_name = 'scale_{}'.format(scale)
        mask = scale_dict[scale_name][0]
        rois = scale_dict[scale_name][1]
        num_rois = len(rois)

        data_rois = np.zeros((num_rois, t_points))
        for ind, roi in enumerate(rois):
            data_rois[ind,:] = np.mean(data[mask==roi,:],axis=0)
        mat = np.corrcoef(data_rois)

        if not scale_name in all_dict.keys():
            all_dict[scale_name] = np.zeros((num_rois, num_rois, num_subs))
        all_dict[scale_name][..., s_id] = mat
    stop = time.time()
    took = stop - start
    avg_time = np.append(avg_time, took)
    avg = np.average(avg_time)
    rem_t = rem * avg
    sys.stdout.write('\r #{} took {:.3f}, {:.1f}% done, {:.2f} more seconds to go'.format(s_id+1, took, p_c, rem_t))
    sys.stdout.flush()


 #901, 100.0% done, 0.00 more seconds to go

In [7]:
from scipy import io as si

In [12]:
si.savemat('john_file.mat',all_dict)

In [14]:
out_str = ''
for sub in subs.keys():
    out_str = '{},{}'.format(out_str, sub)

In [15]:
f = open('subjects.txt', 'wb')
f.write(out_str)
f.close()

In [ ]: