# Generate Network Stacks for the GLM

The goal of this experiment is to investigate the variance in the preprocessed data. I will investigate the different metrics regarding std and binarized count

1. Generate 4D stacks of all subjects for a given template network so I can flip through them
2. Generate mean and std maps across all subjects for all networks
3. Make a 4D stack of the template networks for comparison
``````

In [1]:

import os
import re
import copy
import numpy as np
import pandas as pd
import nibabel as nib
import brainbox as bb
import multiprocessing as mp
from matplotlib import pyplot as plt

``````
``````

In [2]:

import os
import re
import glob
import copy
import numpy as np
import nibabel as nib
from brainbox import tools as to
from  __builtin__ import any as b_any

``````
``````

In [3]:

def find_files(in_path, ext, targets, template='(?<=\d{2})\d{5}', sub=False):
"""
Finds matching files with extension ext and returns them in
the order of the targets list given as argument
Returns a dictionary identical to what I was using before
Also drops duplicates
"""
# Go through each directory and see if I can find the subjects I am looking
# for
ext = '*{}'.format(ext)
out_dict = {key: [] for key in ['sub_name', 'dir', 'path']}

if not sub:
sub_dirs = [d for d in os.walk(in_path).next()[1]]
print(sub_dirs)
for sub_dir in sub_dirs:
print('heyho')
tmp_dir = os.path.join(in_path, sub_dir)
in_files = glob.glob(os.path.join(tmp_dir, ext))
tmp_dict = dict()

# Get the files that we have
matches = [x for x in targets if b_any(str(x) in t for t in in_files)]

for in_file in in_files:
sub_name = os.path.basename(in_file.split('.')[0])
sub_id = re.search(r'{}'.format(template), sub_name).group()
if sub_id in tmp_dict.keys():
# This is a duplicate
continue
tmp_dict[sub_id] = (sub_name, in_file)

# Re-sort the path info
sort_list = list()
for target in matches:
sub_name, in_file = tmp_dict[target]
out_dict['sub_name'].append(sub_name)
out_dict['dir'].append(sub_dir)
out_dict['path'].append(in_file)
else:
sub_dir = sub
tmp_dir = os.path.join(in_path, sub_dir)
in_files = glob.glob(os.path.join(tmp_dir, ext))
tmp_dict = dict()

# Get the files that we have
matches = [x for x in targets if b_any(str(x) in t for t in in_files)]

for in_file in in_files:
sub_name = os.path.basename(in_file.split('.')[0])
sub_id = re.search(r'{}'.format(template), sub_name).group()
if sub_id in tmp_dict.keys():
# This is a duplicate
continue
tmp_dict[sub_id] = (sub_name, in_file)

for target in matches:
sub_name, in_file = tmp_dict[target]
out_dict['sub_name'].append(sub_name)
out_dict['dir'].append(sub_dir)
out_dict['path'].append(in_file)
return out_dict

``````
``````

In [8]:

# Paths
scale = 12
run = 1
method = 'dual_regression'
directory = '{}_sc{}'.format(method, scale)
in_path = '/data1/pierre_scores/rest_{}'.format(run)
out_path = '/data1/pierre_scores/out/scores_s{}/stack_maps_{}'.format(scale, run)
if not os.path.isdir(out_path):
try:
os.makedirs(out_path)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(out_path):
pass
else: raise

ext = '.nii.gz'

targets = pheno['subjects'].values
file_dict = find_files(in_path, ext, targets, template='(?<=fmri_)s\d{6}\S{3}', sub=directory)
num_subs = len(file_dict['path'])
data_subs = np.array([re.search(r'(?<=fmri_)s\d{6}\S{3}', sub_id).group() for sub_id in file_dict['sub_name']])

``````
``````

In [9]:

def run_par(args):
"""
"""
ds, num_subs, use_dict, tmp_i, out_path, net_id = args

mean_mat = np.zeros(ds[:3] + (scale,))
std_mat = np.zeros(ds[:3] + (scale,))

sub_stack = np.zeros(ds[:3] + (num_subs,))
for sub_id in np.arange(num_subs):
data = img.get_data()
net = data[..., net_id]
sub_stack[..., sub_id] = net
# Save the network stack first
nib.save(stack_out, os.path.join(out_path, '{}_netstack_net{}_scale_{}_run_{}.nii.gz'.format(method, net_id + 1, scale, run)))
# Get the mean and std
mean = np.mean(sub_stack, axis=3)
mean_mat[..., net_id] = mean
std = np.std(sub_stack, axis=3)
std_mat[..., net_id] = std

return mean_mat, std_mat

``````
``````

In [10]:

# Get a template
tmp = tmp_i.get_data()
ds = tmp.shape

# Set up the parallel processing
p_perc = 0.9
p_count = int(np.floor(mp.cpu_count() * p_perc))

# Prepare the meta mats
mean_mat = np.zeros(ds[:3] + (scale,))
std_mat = np.zeros(ds[:3] + (scale,))
arg_list = list()
for net_id in np.arange(scale):
arg_list.append((ds, num_subs, file_dict, tmp_i, out_path, net_id))

# Run the stuff in parallel
print('Running things in parallel now - for speed!')
p = mp.Pool(p_count)
results = p.map(run_par, arg_list)
print('Done with that')

``````
``````

Running things in parallel now - for speed!
Done with that

``````
``````

In [11]:

for result in results:
mean_mat += result[0]
std_mat += result[1]

# Save the mean and std maps
print('Saving the mean and std files to {}'.format(out_path))
nib.save(mean_out, os.path.join(out_path, '{}_mean_stack_scale{}_{}.nii.gz'.format(method, scale, run)))
nib.save(std_out, os.path.join(out_path, '{}_std_stack_scale{}_run_{}.nii.gz'.format(method, scale, run)))

``````
``````

Saving the mean and std files to /data1/pierre_scores/out/scores_s12/stack_maps_1

``````
``````

In [ ]:

``````