Notebook of Monday, 18th of August 2014

The first thing I'll do is to pick an ICC map (more a correlation map) and from that pick a voxel with a high value. I will then calculate the correlation between the time point values for this voxel manually and compare them with the score in the ICC map in order to check if the values make sense. The values have been generate by this script.

Setup


In [1]:
# Get the imports
import os
import re
import glob
import numpy as np
import nibabel as nib
from matplotlib import pyplot as plt
from IPython.core.display import Image

In [2]:
# Set the paths for the inputs
project_path = '/data1/scores'
data_path = os.path.join(project_path, 'all_out')
result_path = os.path.join(project_path, 'test_output')
figure_path = os.path.join(result_path, 'figures')
volume_path = os.path.join(result_path, 'volumes')

Beginning of Analysis

First, I'll get the volume ICC maps and show a slice. Here, I want to see the seed based metric because the has high values and I want to compare it to the sliding window with 50 time points. To keep things simple, I'll use scale 10 and the network number 8 which is the DMN.


In [3]:
# Session 1 vs 2
seed_icc_path_12 = os.path.join(volume_path, 'icc_map_12_net_8_sc_10_seed.nii.gz')
slide_icc_path_12 = os.path.join(volume_path, 'icc_map_12_net_8_sc_10_sld50.nii.gz')
# Session 1 vs 3
seed_icc_path_13 = os.path.join(volume_path, 'icc_map_13_net_8_sc_10_seed.nii.gz')
slide_icc_path_13 = os.path.join(volume_path, 'icc_map_13_net_8_sc_10_sld50.nii.gz')
# Session 2 vs 3
seed_icc_path_23 = os.path.join(volume_path, 'icc_map_23_net_8_sc_10_seed.nii.gz')
slide_icc_path_23 = os.path.join(volume_path, 'icc_map_23_net_8_sc_10_sld50.nii.gz')

In [4]:
# Read the files
seed_12 = nib.load(seed_icc_path_12).get_data()
slide_12 = nib.load(slide_icc_path_12).get_data()

seed_13 = nib.load(seed_icc_path_13).get_data()
slide_13 = nib.load(slide_icc_path_13).get_data()

seed_23 = nib.load(seed_icc_path_23).get_data()
slide_23 = nib.load(slide_icc_path_23).get_data()

In [5]:
# Display the slice
slice = 40
fig = plt.figure()
# Session 1 
ax_se12 = fig.add_subplot(231)
ax_se12.imshow(np.rot90(seed_12[:,slice,:]), interpolation='nearest')
ax_se12.set_axis_off()
ax_se12.set_title('seed 1v2')

ax_sl12 = fig.add_subplot(234)
ax_sl12.imshow(np.rot90(slide_12[:,slice,:]), interpolation='nearest')
ax_sl12.set_axis_off()
ax_sl12.set_title('slide 1v2')
# Session 2
ax_se13 = fig.add_subplot(232)
ax_se13.imshow(np.rot90(seed_13[:,slice,:]), interpolation='nearest')
ax_se13.set_axis_off()
ax_se13.set_title('seed 1v3')

ax_sl13 = fig.add_subplot(235)
ax_sl13.imshow(np.rot90(slide_13[:,slice,:]), interpolation='nearest')
ax_sl13.set_axis_off()
ax_sl13.set_title('slide 1v3')
# Session 3
ax_se23 = fig.add_subplot(233)
ax_se23.imshow(np.rot90(seed_23[:,slice,:]), interpolation='nearest')
ax_se23.set_axis_off()
ax_se23.set_title('seed 2v3')

ax_sl23 = fig.add_subplot(236)
ax_sl23.imshow(np.rot90(slide_23[:,slice,:]), interpolation='nearest')
ax_sl23.set_axis_off()
ax_sl23.set_title('slide 2v3')


Out[5]:
<matplotlib.text.Text at 0x357a0d0>

Now I want to grab the maximum value from each of the maps and compare it to the values in the individual subjects. Thus I can compute the values manually again here.


In [6]:
# Find the subjects

# First, get the sliding window maps
sld_files = glob.glob(os.path.join(data_path, 'sld_clust_10_win_50', 'stability_maps', '*.nii.gz'))
sld_subs = {}
for in_file in sld_files:
    # find subject name
    sub_name = re.search(r'sub\d*', in_file.split('/')[-1]).group()
    ses_name = int(re.search(r'(?<=session)\d', in_file.split('/')[-1]).group())
    # Store the file in the dictionary
    if not sub_name in sld_subs.keys():
        sld_subs[sub_name] = range(3)
    sld_subs[sub_name][ses_name-1] = in_file
    
# Second, the seed files
seed_files = glob.glob(os.path.join(data_path, 'cbb_10', 'rmap_part', '*.nii.gz'))
seed_subs = {}
for in_file in seed_files:
    # find subject name
    sub_name = re.search(r'sub\d*', in_file.split('/')[-1]).group()
    ses_name = int(re.search(r'(?<=session)\d', in_file.split('/')[-1]).group())
    # Store the file in the dictionary
    if not sub_name in seed_subs.keys():
        seed_subs[sub_name] = range(3)
    seed_subs[sub_name][ses_name-1] = in_file

In [7]:
# Find the top 100 voxels in each modality - vectorize the arrays first
top = 100

seed_vec_12 = seed_12.flatten()
seed_vec_13 = seed_13.flatten()
seed_vec_23 = seed_23.flatten()
seed_v = (seed_vec_12, seed_vec_13, seed_vec_23)

slide_vec_12 = slide_12.flatten()
slide_vec_13 = slide_13.flatten()
slide_vec_23 = slide_23.flatten()
sld_v = (slide_vec_12, slide_vec_13, slide_vec_23)

# Turn them into tuples so we can save variables
seed_top = (seed_vec_12.argsort()[-top:][::-1], seed_vec_13.argsort()[-top:][::-1], seed_vec_23.argsort()[-top:][::-1])
sld_top = (slide_vec_12.argsort()[-top:][::-1], slide_vec_13.argsort()[-top:][::-1], slide_vec_23.argsort()[-top:][::-1])

Now pick an element and build a vector for each session containing all subjects. Then we can use this vector to compute the correlation coefficient for the voxel


In [8]:
pick_top = 80
network = 8
# Do it for the seeds first
num_subs = len(seed_subs.keys())
seed_arr = np.zeros((num_subs, 6))
for sub_id, sub in enumerate(seed_subs.keys()):
    sub_files = seed_subs[sub]
    # Get the files
    vec1 = nib.load(sub_files[0]).get_data()[:,:,:,network].flatten()
    vec2 = nib.load(sub_files[1]).get_data()[:,:,:,network].flatten()
    vec3 = nib.load(sub_files[2]).get_data()[:,:,:,network].flatten()
    # Pull the value and store it in the array
    seed_arr[sub_id, 0] = vec1[seed_top[0][pick_top]]
    seed_arr[sub_id, 1] = vec2[seed_top[0][pick_top]]
    seed_arr[sub_id, 2] = vec1[seed_top[1][pick_top]]
    seed_arr[sub_id, 3] = vec3[seed_top[1][pick_top]]
    seed_arr[sub_id, 4] = vec2[seed_top[2][pick_top]]
    seed_arr[sub_id, 5] = vec3[seed_top[2][pick_top]]

# Now do it for the slide
num_subs = len(sld_subs.keys())
sld_arr = np.zeros((num_subs, 6))
for sub_id, sub in enumerate(sld_subs.keys()):
    sub_files = sld_subs[sub]
    # Get the files
    vec1 = nib.load(sub_files[0]).get_data()[:,:,:,network].flatten()
    vec2 = nib.load(sub_files[1]).get_data()[:,:,:,network].flatten()
    vec3 = nib.load(sub_files[2]).get_data()[:,:,:,network].flatten()
    # Pull the value and store it in the array
    sld_arr[sub_id, 0] = vec1[sld_top[0][pick_top]]
    sld_arr[sub_id, 1] = vec2[sld_top[0][pick_top]]
    sld_arr[sub_id, 2] = vec1[sld_top[1][pick_top]]
    sld_arr[sub_id, 3] = vec3[sld_top[1][pick_top]]
    sld_arr[sub_id, 4] = vec2[sld_top[2][pick_top]]
    sld_arr[sub_id, 5] = vec3[sld_top[2][pick_top]]

f = plt.figure()
for id, count in enumerate(range(0,6,2)):
    ax = f.add_subplot(2,3,id+1)
    ax.scatter(seed_arr[:,count], seed_arr[:,count+1])
    ax.set_title('seed {}'.format(id))
    
    bx = f.add_subplot(2,3,id+4)
    bx.scatter(sld_arr[:,count], sld_arr[:,count+1])
    bx.set_title('slide {}'.format(id))
f.tight_layout()

In [15]:
# Plot the values from the two vectors against each other and display them
f = plt.figure()
for id, count in enumerate(range(0,6,2)):
    pos = id + 1
    tmp = fig.add_subplot(2, 3, pos)
    tmp.scatter(seed_arr[:,count], seed_arr[:,count+1])
    tmp.set_title('seed {0}'.format(pos))
    
    tmp2 = fig.add_subplot(2, 3, pos + 3)
    tmp2.scatter(sld_arr[:,count], sld_arr[:,count+1])
    tmp2.set_title('slide {0}'.format(pos))


<matplotlib.figure.Figure at 0x50cc910>

In [39]:
f = plt.figure()
for id, count in enumerate(range(0,6,2)):
    ax = f.add_subplot(2,3,id+1)
    ax.scatter(seed_arr[:,count], seed_arr[:,count+1])
    ax.set_title('seed {}'.format(id))
    
    bx = f.add_subplot(2,3,id+4)
    bx.scatter(sld_arr[:,count], sld_arr[:,count+1])
    bx.set_title('slide {}'.format(id))
f.tight_layout()



In [41]:
# Now compute correlation coefficients and compare them to what we have in the map
seed_str = ''
sld_str = ''
for id, count in enumerate(range(0,6,2)):
    se = np.correlate(seed_arr[:,count], seed_arr[:,count+1])
    se_comp = seed_v[id][seed_top[id][pick_top]]
    sl = np.correlate(sld_arr[:,count], sld_arr[:,count+1])
    sl_comp = sld_v[id][sld_top[id][pick_top]]
    seed_str = '{0}seed {1} is: {2} and in map is: {3}\n'.format(seed_str, count/2.0, se, se_comp)
    sld_str = '{0}slide {1} is: {2} and in map is: {3}\n'.format(sld_str, count/2.0, sl, sl_comp)
print(seed_str)
print(sld_str)


seed 0.0 is: [ 1.03290312] and in map is: 0.854629642416
seed 1.0 is: [ 2.19106532] and in map is: 0.823262180819
seed 2.0 is: [ 1.84356221] and in map is: 0.848454177157

slide 0.0 is: [ 0.] and in map is: 0.963027622687
slide 1.0 is: [ 0.0040524] and in map is: 0.956746963641
slide 2.0 is: [ 0.35811916] and in map is: 0.94308828821


In [207]:



Out[207]:
'slide 0.0 is: -0.124740837643 and in map is: 0.963027622687\nslide 1.0 is: -0.00113937486188 and in map is: 0.956746963641\nslide 2.0 is: 0.631843299965 and in map is: 0.94308828821\n'

In [206]:



Out[206]:
(53, 64, 52)

In [167]:


In [ ]: