In [2]:
import numpy as np
import pandas as pd
import nibabel as nib
In [3]:
mask_stack_path = '/home/despoB/lesion/anat_preproc/mni_mask_stack.nii.gz'
In [4]:
mask_stack_img = nib.load(mask_stack_path)
In [5]:
mask_stack_data = mask_stack_img.get_data()
In [6]:
mask_stack_data.shape
Out[6]:
In [7]:
inverse_affine = np.linalg.inv(mask_stack_img.affine)
Confirm that our coordinate translation works by checking against a known relationship (from FSLView).
MNI coordinate (28, 36, 2) = Voxel coordinate (31, 81, 37)
In [8]:
nib.affines.apply_affine(inverse_affine, [28, 36, 2])
Out[8]:
To be double sure, test our transformed coordinates on real data.
The patient 191 has a lesioned voxel at MNI coordinate (-36, 2, -2), but moving more than 2mm (1 voxel) in any direction puts you in empty space.
In [9]:
mask_path_191 = '/home/despoB/lesion/anat_preproc/191/191_mask_mni.nii.gz'
In [10]:
mask_img_191 = nib.load(mask_path_191)
In [11]:
mask_data_191 = mask_img_191.get_data()
There should be damage here..
In [12]:
nib.affines.apply_affine(inverse_affine, [-36, 2, -2])
Out[12]:
In [13]:
mask_data_191[63, 64, 35]
Out[13]:
But not in any of these places...
In [14]:
nib.affines.apply_affine(inverse_affine, [-40, 2, -2])
Out[14]:
In [15]:
mask_data_191[65, 64, 35]
Out[15]:
In [16]:
nib.affines.apply_affine(inverse_affine, [-36, -2, -2])
Out[16]:
In [17]:
mask_data_191[63, 62, 35]
Out[17]:
In [18]:
nib.affines.apply_affine(inverse_affine, [-36, 2, 2])
Out[18]:
In [19]:
mask_data_191[63, 64, 37]
Out[19]:
Looks like our coordinate conversion is working. These were lazy checks, but lazy is better than nothing.
In [20]:
test_data = np.random.rand(3, 3, 3)
In [21]:
test_data.shape
Out[21]:
In [22]:
test_data
Out[22]:
Pull an arbitrary item using 3D indexing.
In [23]:
test_data[0,2,1]
Out[23]:
The ravel_multi_index
function translates a multi-dimensional index into the equivalent 1D index of a raveled array.
In [24]:
np.ravel_multi_index([0,2,1], (3,3,3))
Out[24]:
In [25]:
test_data.ravel()[7]
Out[25]:
Now let's check this works on our image data.
In [26]:
mask_data_191[63, 64, 35]
Out[26]:
In [27]:
np.ravel_multi_index([63, 64, 35], mask_data_191.shape)
Out[27]:
In [28]:
mask_data_191.ravel()[630756]
Out[28]:
Now that we've got indexing working using NumPy arrays, let's translate things into Pandas so we can have labeled rows.
In [29]:
%reload_ext memory_profiler
In [45]:
mask_stack_data.shape
Out[45]:
In [46]:
%%memit
for i in range(64):
pdata = mask_stack_data[...,i].ravel()
try:
mask_data_df['10'+str(i)] = pdata
except:
mask_data_df = pd.DataFrame({'10'+str(i):pdata})
print('Created DF')
In [47]:
mask_data_df.head()
Out[47]:
Now that we have the DF, let's search it.
In [48]:
test_results = mask_data_df.loc[630756,:] == 1
list(test_results[test_results == True].index)
Out[48]:
In [49]:
%%timeit
test_results = mask_data_df.loc[630756,:] == 1
list(test_results[test_results == True].index)
In [50]:
%memit
test_results = mask_data_df.loc[630756,:] == 1
list(test_results[test_results == True].index)
Out[50]:
Sure enough, visual inspection confirms that those patients have lesions at that coordinate.
Let's see if it is much slower when we tweak things so the outputs a list of patients directly.
In [51]:
%%timeit
mask_data_df.T[mask_data_df.T[630756] == 1].index
Okay, so it looks like we definitely don't want to just transpose the DF on the fly. What if we do that beforehand.
In [52]:
mask_data_t = mask_data_df.T
In [53]:
%%timeit
mask_data_t[mask_data_t[630756] == 1].index
Hmm, okay. It seems like the slowdown comes from indexing over columns instead of rows. Back to our original plan.
In [54]:
store = pd.HDFStore('test_mask_data.h5')
In [55]:
store.put('df', mask_data_df, format='t')
In [56]:
store.df.head()
Out[56]:
We can close the store to free up memory when we're not using the mask data.
In [57]:
store.close()
del store
When working with data stored as HD5, we can select only the rows/columns we want to be read into memory.
This makes searching really fast.
In [58]:
flat_index = 630756
In [63]:
%%timeit
restore = pd.HDFStore('test_mask_data.h5')
vox_row = restore.select('df', start=flat_index, stop=flat_index+1)
res = vox_row.T.iloc[:,0] == 1
patients = list(res[res == True].index)
restore.close()
In [60]:
%memit
restore = pd.HDFStore('test_mask_data.h5')
vox_row = restore.select('df', start=flat_index, stop=flat_index+1)
res = vox_row.T.iloc[:,0] == 1
patients = list(res[res == True].index)
restore.close()
Much faster than reading the entire DF (though it looks like memory use is the same).
In [ ]:
%%timeit
store = pd.HDFStore('test_mask_data.h5')
test_results = store.df.loc[630756,:] == 1
patients = list(test_results[test_results == True].index)
store.close()
In [ ]:
%memit
store = pd.HDFStore('test_mask_data.h5')
test_results = store.df.loc[630756,:] == 1
patients = list(test_results[test_results == True].index)
store.close()
We can now quickly seach existing patients, but how about adding mask data for a new patient?
In [ ]:
store = pd.HDFStore('test_mask_data.h5')
In [ ]:
data_df = store.get('df') # We have to get/modify a copy, because you can't add rows to an HD5 table.
In [ ]:
data_df['191'] = mask_data_191.ravel()
In [ ]:
store.put('df', data_df, format='t') # Overwrite the existing DF with the new DF.
In [ ]:
store.close()
Double-check that store
now contains the new patient data.
In [ ]:
with pd.HDFStore('test_mask_data.h5') as store:
print(store.df.columns)
It's hacky, slow, and and will eat up a lot of memory as the DF grows, but it works for now.
In [ ]:
from os import path
from nibabel import load, affines
from pandas import HDFStore
from numpy import ravel_multi_index, random, prod
In [ ]:
def add_replace_patient_mask(patient_number, mask_path, store_path):
"""
Add/replace a new/existing patient mask to the mask data-store.
Parameters
----------
patient_number : str
mask_path : str
Path to a standard-space NIfTI image.
store_path : str
Path to a Pandas HDFSstore (e.g. file.h5)
"""
# Check inputs
assert isinstance(patient_number, str)
assert path.exists(mask_path)
if path.exists(store_path) is False:
assert path.exists(store_path)
# Load the mask image.
mask_data = load(mask_path).get_data()
# Load the mask-data store. If no store exists yet, it will be created.
store = HDFStore(store_path)
# Get the full DataFrame.
data_df = store.get('df')
# Add a new column for the new patient.
data_df[patient_number] = mask_data.ravel()
# Put the updated DataFrame back in the data store.
store.put('df', data_df, format='t')
# Force all our changes to be written to disk.
store.flush()
# Unload the data store.
store.close()
In [ ]:
%%time
add_replace_patient_mask('192', '/home/despoB/lesion/anat_preproc/191/191_mask_mni.nii.gz','test_mask_data.h5')
It's slow, but acceptable. Adding new patients won't happen all that often. May have to re-factor when the DF gets bigger.
In the future, we can try refactoring things to utilize store.select
while storing each mask as a separate node in the HDFStore.
We may need to remove a patient entirely from the data store.
In [ ]:
store = pd.HDFStore('test_mask_data.h5')
In [ ]:
data_df = store.get('df') # We have to get/modify a copy, because you can't add rows to an HD5 table.
In [ ]:
data_df.drop('191', axis=1, inplace=True)
In [ ]:
store.put('df', data_df, format='t') # Overwrite the existing DF with the new DF.
In [ ]:
store.close()
Double-check that patient data for 191 has been removed from store
.
In [ ]:
with pd.HDFStore('test_mask_data.h5') as store:
print(store.df.columns)
In [ ]:
def remove_patient(patient_number, store_path):
"""
Remove a patient and their mask from the mask-data store.
Parameters
----------
patient_number : str
store_path : str
Path to a Pandas HDFSstore (e.g. file.h5)
"""
# Check inputs
assert isinstance(patient_number, str)
assert path.exists(store_path)
# Load the mask-data store.
store = HDFStore(store_path)
# Get the full DataFrame.
data_df = store.get('df')
# Remove the column for the patient.
data_df.drop(patient_number, axis=1, inplace=True)
# Put the updated DataFrame back in the data store.
store.put('df', data_df, format='t')
# Force all our changes to be written to disk.
store.flush()
# Unload the data store.
store.close()
In [ ]:
%%time
remove_patient('192','test_mask_data.h5')
In [ ]:
def coordinate_search(mni_coordinates, img_shape, inverse_affine, store_path):
"""
Parameters
----------
coordinates : tuple
MNI coordinates (x,y,z)
img_shape : tuple
Voxel dimensions of the 3D mask images to be searched.
inverse_affine : array-like
Mapping from MNI space to voxel-space.
store_path : str
Path to a Pandas HDFSstore (e.g. file.h5)
Returns
-------
patients : array-like
A list of patients with damage at the search coordinate.
Notes
-----
Currently only able to handle 2mm resolution MNI coordinates.
"""
voxel_coordinates = nib.affines.apply_affine(inverse_affine, list(mni_coordinates))
voxel_coordinates = [int(i) for i in voxel_coordinates]
voxel_index = np.ravel_multi_index(voxel_coordinates, img_shape)
restore = pd.HDFStore(store_path)
vox_row = restore.select('df', start=voxel_index, stop=voxel_index+1)
res = vox_row.T.iloc[:,0] == 1
patients = list(res[res == True].index)
restore.close()
return patients
In [ ]:
coordinate_search((-36, 2, -2), mask_data_191.shape, inverse_affine, 'mask_data.h5')
In [ ]:
from nilearn import image
def generate_overlap_iamge(overlap_image_path):
# Get an array containing the mask_paths for all patients in the database.
mask_files = Patient.
# Stack mask images
mask_stack_img = image.concat_imgs(mask_files)
# Sum the mask stack
overlap_img = image.math_img("np.sum(img1, axis=-1)", img1=mask_stack_img)
# Save the image
overlap_img.to_filename(overlap_image_path)
In [ ]:
sum_test = generate_overlap_iamge('foo')
In [ ]:
sum_test.shape
In [ ]:
sum_test.to_filename('sum_test.nii.gz')
In [ ]:
In [ ]:
def add_patient(pid, mask_data, df):
if df is None:
df = pd.DataFrame({pid:mask_data})
else:
df[pid] = mask_data
return df
In [ ]:
import memory_profiler as mp
In [ ]:
mp.
In [ ]:
foo = %memit -o
mask_data_df = add_patient('191', mask_data_191.ravel(), None)
In [ ]:
foo.mem_usage
In [ ]:
mask_data_df.shape