Outline for this project

  • Find the timeseries data
  • Reduce their dimensionality until it is manageable

In [11]:
import re
import copy
import brainbox as bb

In [ ]:


In [2]:
# Paths
in_path = '/data1/abide/Full/abide_release_sym_gsc0_lp01'

In [3]:
file_dict = bb.fileOps.grab_files(in_path, '.nii.gz')

In [7]:
# Duplicate dropper
def drop_duplicates(in_dict):
    """
    Because python uses pointers and does not copy the variables
    I can operate directly on the dictionary and change it in place
    """
    cp_dict = copy.deepcopy(in_dict)
    subs = cp_dict['sub_name']
    dirs = cp_dict['dir']
    path = cp_dict['path']
    drop = list()
    present = list()
    sub_names = np.array([int64(re.search(r'(?<=\d{2})\d{5}', sub_id).group()) for sub_id in cp_dict['sub_name']])
    for idx, sub in enumerate(sub_names):
        if not sub in present:
            present.append(sub)
        else:
            drop.append(idx)
    print('Found {} items to drop'.format(len(drop)))
    # Pop them in reverse order
    for idx in drop[::-1]:
        subs.pop(idx)
        dirs.pop(idx)
        path.pop(idx)
    
    return cp_dict

In [12]:
in_dict = drop_duplicates(file_dict)


Found 49 items to drop

In [14]:
data_dict = bb.fileOps.read_files(in_dict, silence=True)


---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-14-7ab3338bf80a> in <module>()
----> 1 data_dict = bb.fileOps.read_files(in_dict, silence=True)

/home/surchs/Code/brainbox/fileOps/base.pyc in read_files(file_dict, network, silence)
    123             # Preallocate array
    124             array_dict[sub_dir] = [np.empty(arr_size), 0]
--> 125         array_dict[sub_dir][0][array_dict[sub_dir][1], ...] = tmp_flat
    126         array_dict[sub_dir][1] += 1
    127 

IndexError: index 0 is out of bounds for axis 0 with size 0

In [ ]: