Region parcellation from spatial connectivity


In [1]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
from scipy.io import loadmat

Load data


In [2]:
# W_fn = os.path.join(os.getenv('HOME'),'work/allen/data/connectivities/allvis_sdk_free_noshell/W_ipsi_all_1.0000e+05.h5')

W_fn = os.path.join(os.getenv('HOME'),'work/allen/data/connectivities/extra_vis_friday_harbor/W_ipsi_1e6.h5')
mat_file = os.path.join(os.getenv('HOME'),'work/allen/data/connectivities/extra_vis_friday_harbor/extra_vis_friday_harbor.mat')

f = h5py.File(W_fn,'r')
W = np.array(f.get('dataset'))
locals().update(loadmat(mat_file))

In [3]:
whos


Variable                          Type          Data/Info
---------------------------------------------------------
Lx                                csc_matrix      (0, 0)	-5.0\n  (1, 0)	1<...>	1.0\n  (7496, 7496)	-6.0
Ly_contra                         csc_matrix      (0, 0)	-3.0\n  (2, 0)	1<...>	1.0\n  (7526, 7526)	-5.0
Ly_ipsi                           csc_matrix      (0, 0)	-5.0\n  (1, 0)	1<...>	1.0\n  (7496, 7496)	-6.0
Omega                             csc_matrix      (8, 77)	1.0\n  (8, 84)	<...>95)	1.0\n  (24, 7496)	1.0
W                                 ndarray       7497x7497: 56205009 elems, type `float64`, 449640072 bytes (428 Mb)
W_fn                              str           /home/kameron/work/allen/<...>iday_harbor/W_ipsi_1e6.h5
col_label_list_source             ndarray       7497x1: 7497 elems, type `float64`, 59976 bytes
col_label_list_target_contra      ndarray       7527x1: 7527 elems, type `float64`, 60216 bytes
col_label_list_target_ipsi        ndarray       7497x1: 7497 elems, type `float64`, 59976 bytes
experiment_source_matrix          ndarray       25x7497: 187425 elems, type `float64`, 1499400 bytes (1 Mb)
experiment_target_matrix_contra   ndarray       25x7527: 188175 elems, type `float64`, 1505400 bytes (1 Mb)
experiment_target_matrix_ipsi     ndarray       25x7497: 187425 elems, type `float64`, 1499400 bytes (1 Mb)
f                                 File          <HDF5 file "W_ipsi_1e6.h5" (mode r)>
h5py                              module        <module 'h5py' from '/hom<...>kages/h5py/__init__.pyc'>
loadmat                           function      <function loadmat at 0x7fb4022c8f50>
mat_file                          str           /home/kameron/work/allen/<...>tra_vis_friday_harbor.mat
np                                module        <module 'numpy' from '/ho<...>ages/numpy/__init__.pyc'>
os                                module        <module 'os' from '/home/<...>da/lib/python2.7/os.pyc'>
plt                               module        <module 'matplotlib.pyplo<...>s/matplotlib/pyplot.pyc'>
row_label_list                    ndarray       25x1: 25 elems, type `int64`, 200 bytes
source_acro                       ndarray       10x1: 10 elems, type `object`, 80 bytes
source_ids                        ndarray       10x1: 10 elems, type `int64`, 80 bytes
sys                               module        <module 'sys' (built-in)>
target_acro                       ndarray       10x1: 10 elems, type `object`, 80 bytes
target_ids                        ndarray       10x1: 10 elems, type `int64`, 80 bytes
voxel_coords_source               ndarray       7497x3: 22491 elems, type `float64`, 179928 bytes (175 kb)
voxel_coords_target_contra        ndarray       7527x3: 22581 elems, type `float64`, 180648 bytes (176 kb)
voxel_coords_target_ipsi          ndarray       7497x3: 22491 elems, type `float64`, 179928 bytes (175 kb)

In [77]:
int_axis=1
rearrange_2d=lambda(arr): arr
#rearrange_2d=lambda(arr): np.fliplr(arr)
#rearrange_2d=lambda(arr): np.swapaxes(arr,0,1)
# remap labels
new_labels = col_label_list_source.copy()
new_label_map = []
for i,label in enumerate(np.unique(new_labels)):
    new_labels[new_labels == label] = \
      i+1
    new_label_map.append(label)
label_grid=map_to_regular_grid(new_labels,
                               voxel_coords_source).squeeze()
label_grid[label_grid==0]=np.nan
label_grid_2d=mode(label_grid, axis=int_axis)[0].squeeze()
label_grid_2d[label_grid_2d==0]=np.nan
label_unique=np.unique(new_labels)
label_grid_2d=rearrange_2d(label_grid_2d)

In [78]:
def draw_region_labels():
    '''
    Convenience function that draws region labels
    '''
    for i,label in enumerate(np.unique(col_label_list_source)):
        x,y=centroid_of_region_2d(label_grid_2d, i+1)
        # region_name=source_acro[source_ids==label_lookup[newlab]][0][0]
        region_name=source_acro[source_ids==label][0][0]
        # print "%s centroid at (%d, %d)" % (region_name,x,y)
        plt.annotate(region_name, xy=(x-1., y), fontsize=20)

In [4]:
sys.path.append('/home/kameron/work/allen/src/allen-voxel-network')
print sys.path


['', '/home/kameron/.local/lib/python2.7/site-packages/scikits.talkbox-0.2.5-py2.7-linux-x86_64.egg', '/home/kameron/.local/lib/python2.7/site-packages/PyNN-0.7.5-py2.7.egg', '/home/kameron/.local/lib/python2.7/site-packages/brian-1.4.1-py2.7-linux-x86_64.egg', '/home/kameron/local/anaconda/lib/python2.7/site-packages/progressbar-2.3-py2.7.egg', '/home/kameron/local/anaconda/lib/python2.7/site-packages/friday_harbor-0.6-py2.7.egg', '/home/kameron/local/anaconda/lib/python2.7/site-packages/nest-1.3.0-py2.7.egg', '/home/kameron/local/anaconda/lib/python2.7/site-packages/Brian2-2.0b3-py2.7-linux-x86_64.egg', '/home/kameron/local/anaconda/lib/python2.7/site-packages/scikits.bootstrap-0.3.2-py2.7.egg', '/home/kameron/local/anaconda/lib/python2.7/site-packages/pynrrd-0.2.1-py2.7.egg', '/home/kameron/local/anaconda/lib/python27.zip', '/home/kameron/local/anaconda/lib/python2.7', '/home/kameron/local/anaconda/lib/python2.7/plat-linux2', '/home/kameron/local/anaconda/lib/python2.7/lib-tk', '/home/kameron/local/anaconda/lib/python2.7/lib-old', '/home/kameron/local/anaconda/lib/python2.7/lib-dynload', '/home/kameron/.local/lib/python2.7/site-packages', '/home/kameron/local/anaconda/lib/python2.7/site-packages/Sphinx-1.3.1-py2.7.egg', '/home/kameron/local/anaconda/lib/python2.7/site-packages/setuptools-17.1.1-py2.7.egg', '/home/kameron/local/anaconda/lib/python2.7/site-packages/singledispatch-3.4.0.3-py2.7.egg', '/home/kameron/local/anaconda/lib/python2.7/site-packages', '/home/kameron/local/anaconda/lib/python2.7/site-packages/PIL', '/home/kameron/local/anaconda/lib/python2.7/site-packages/cryptography-0.9.1-py2.7-linux-x86_64.egg', '/home/kameron/local/anaconda/lib/python2.7/site-packages/wx-3.0-gtk2', '/home/kameron/local/anaconda/lib/python2.7/site-packages/IPython/extensions', '/home/kameron/work/allen/src/allen-voxel-network']

In [5]:
print os.path.join(os.getenv('HOME'),'/work/allen/src/allen-voxel-network')


/work/allen/src/allen-voxel-network

Analyze with NMF


In [6]:
from plotting.voxlib import *
from sklearn.decomposition import NMF

In [80]:
n_components=20
nmf_model = NMF(n_components=n_components)
U = nmf_model.fit_transform(W);
V = nmf_model.components_.T;
print U.shape, V.shape


(7497, 100) (7497, 100)

In [22]:
def map_and_integrate(voxel_data, voxel_coords, int_axis=1, fun=np.sum):
    '''
    Puts the data into a regular grid and integrates over int_axis
    '''
    grid_data = map_to_regular_grid(voxel_data.astype(float), voxel_coords).squeeze()
    int_data = fun(grid_data, axis=int_axis)
    return int_data

In [81]:
u_int = map_and_integrate(U.astype(float), voxel_coords_source)
v_int = map_and_integrate(V.astype(float), voxel_coords_source)

In [24]:
print u_int.shape


(33, 36, 20)

In [81]:
%matplotlib inline
plt.rcParams["figure.figsize"] = [20, 20]
for i in range(n_components):
    fig,(ax1,ax2) = plt.subplots(1,2)
    ax1.imshow(u_int[:,:,i])
    ax1.set_title("U component %d" % i)
    ax2.imshow(v_int[:,:,i])
    ax2.set_title("V component %d" % i)


/home/kameron/local/anaconda/lib/python2.7/site-packages/matplotlib/pyplot.py:516: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  max_open_warning, RuntimeWarning)
Traceback (most recent call last):

  File "/home/kameron/local/anaconda/lib/python2.7/site-packages/IPython/kernel/zmq/ipkernel.py", line 181, in do_execute
    shell.run_cell(code, store_history=store_history, silent=silent)

  File "/home/kameron/local/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2877, in run_cell
    self.events.trigger('post_execute')

  File "/home/kameron/local/anaconda/lib/python2.7/site-packages/IPython/core/events.py", line 74, in trigger
    func(*args, **kwargs)

  File "/home/kameron/local/anaconda/lib/python2.7/site-packages/IPython/kernel/zmq/pylab/backend_inline.py", line 109, in flush_figures
    return show(True)

  File "/home/kameron/local/anaconda/lib/python2.7/site-packages/IPython/kernel/zmq/pylab/backend_inline.py", line 32, in show
    display(figure_manager.canvas.figure)

  File "/home/kameron/local/anaconda/lib/python2.7/site-packages/IPython/core/display.py", line 159, in display
    format_dict, md_dict = format(obj, include=include, exclude=exclude)

  File "/home/kameron/local/anaconda/lib/python2.7/site-packages/IPython/core/formatters.py", line 174, in format
    data = formatter(obj)

  File "<string>", line 2, in __call__

  File "/home/kameron/local/anaconda/lib/python2.7/site-packages/IPython/core/formatters.py", line 219, in catch_format_error
    r = method(self, *args, **kwargs)

  File "/home/kameron/local/anaconda/lib/python2.7/site-packages/IPython/core/formatters.py", line 330, in __call__
    return printer(obj)

  File "/home/kameron/local/anaconda/lib/python2.7/site-packages/IPython/core/pylabtools.py", line 207, in <lambda>
    png_formatter.for_type(Figure, lambda fig: print_figure(fig, 'png', **kwargs))

  File "/home/kameron/local/anaconda/lib/python2.7/site-packages/IPython/core/pylabtools.py", line 117, in print_figure
    fig.canvas.print_figure(bytes_io, **kw)

  File "/home/kameron/local/anaconda/lib/python2.7/site-packages/matplotlib/backend_bases.py", line 2180, in print_figure
    **kwargs)

  File "/home/kameron/local/anaconda/lib/python2.7/site-packages/matplotlib/backends/backend_agg.py", line 537, in print_png
    _png.write_png(renderer._renderer, filename_or_obj, self.figure.dpi)

KeyboardInterrupt

In [26]:
def normalize_components(U,V):
    assert U.shape[1] == V.shape[1], "U and V have different numbers of columns"
    s = np.zeros(U.shape[1])
    Un = np.zeros(U.shape)
    Vn = np.zeros(V.shape)
    for c in range(U.shape[1]):
        u_norm = np.linalg.norm(U[:,c])
        v_norm = np.linalg.norm(V[:,c])
        s[c] = u_norm * v_norm
        Un[:,c] = U[:,c] / u_norm
        Vn[:,c] = V[:,c] / v_norm
    return (Un,s,Vn)

In [82]:
Un, Sn, Vn = normalize_components(U,V)

In [83]:
component_order = np.flipud(np.argsort(Sn))
Un = Un[:,component_order]
Vn = Vn[:,component_order]
Sn = Sn[component_order]
np.round(Sn*20)


ERROR! Session/line number was not unique in database. History logging moved to new session 656
Out[83]:
array([ 121.,   80.,   42.,   24.,   22.,   17.,   13.,   11.,    9.,
          9.,    8.,    7.,    6.,    6.,    6.,    5.,    5.,    4.,
          4.,    4.,    4.,    4.,    4.,    3.,    3.,    3.,    3.,
          3.,    3.,    3.,    3.,    2.,    2.,    2.,    2.,    2.,
          2.,    2.,    2.,    2.,    2.,    2.,    2.,    2.,    2.,
          2.,    2.,    2.,    2.,    2.,    2.,    2.,    2.,    2.,
          2.,    2.,    2.,    1.,    1.,    1.,    1.,    1.,    1.,
          1.,    1.,    1.,    1.,    1.,    1.,    1.,    1.,    1.,
          1.,    1.,    1.,    1.,    1.,    1.,    1.,    1.,    1.,
          1.,    1.,    1.,    1.,    1.,    1.,    1.,    1.,    1.,
          1.,    1.,    1.,    1.,    1.,    1.,    1.,    1.,    0.,    0.])

In [34]:
np.linalg.norm(Un[:,0])


Out[34]:
0.99999999999999989

In [53]:
def replicate_columns(data, column_counts):
    assert data.ndim == 2, "data should be 2d array"
    assert data.shape[1] == len(column_counts), "each column of data should have a count in column_counts"
    total_col = sum(column_counts)
    column_cumsums = np.cumsum(column_counts)
    data_repl = np.zeros((data.shape[0], total_col))
    for i in range(len(column_counts)):
        if i == 0:
            data_repl[:, 0:column_cumsums[i]] = np.tile(data[:,i],(column_counts[i],1)).T
        else:
            data_repl[:, column_cumsums[i-1]:column_cumsums[i]] = np.tile(data[:,i],(column_counts[i],1)).T
    return data_repl

In [84]:
col_counts = np.round(Sn*20).astype(int)
print col_counts, col_counts.sum()
U_repl = replicate_columns(Un, col_counts)
V_repl = replicate_columns(Vn, col_counts)
U_repl.shape
np.all(U_repl[:,0]==U_repl[:,1])


[121  80  42  24  22  17  13  11   9   9   8   7   6   6   6   5   5   4
   4   4   4   4   4   3   3   3   3   3   3   3   3   2   2   2   2   2
   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2
   2   2   2   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1
   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1
   1   1   1   1   1   1   1   1   0   0] 532
Out[84]:
True

In [85]:
U_repl.shape


Out[85]:
(7497, 532)

In [86]:
plt.imshow(V_repl)


Out[86]:
<matplotlib.image.AxesImage at 0x7fb3e8070190>

Cluster components with K-means (in experiment space)


In [87]:
from sklearn.cluster import KMeans
clust_est = KMeans(n_clusters=7)
#data = np.hstack((U[:,:],V[:,:]))
data = np.hstack((U_repl[:,:],V_repl[:,:]))
#data = Un
clust_est.fit(data)
cluster_labels=np.array(clust_est.labels_, dtype=float) + 1.0

In [88]:
#plt.plot(cluster_labels)
#plt.plot(Sn.cumsum()/Sn.sum())

In [89]:
print cluster_labels.shape
print col_label_list_source.shape
col_label_list_source.squeeze() == 0


(7497,)
(7497, 1)
Out[89]:
array([False, False, False, ..., False, False, False], dtype=bool)

In [90]:
from scipy.stats import mode

def my_mode(data, axis=None):
    return mode(data, axis=axis)[0].squeeze()

# modified_labels = map_to_regular_grid(col_label_list_source, voxel_coords_source)
# modified_labels[modified_labels == 0] = np.nan
# cluster_image = mode(modified_labels, axis=1)[0].squeeze()

modified_labels = map_to_regular_grid(cluster_labels, voxel_coords_source).astype(float)
label_grid = map_to_regular_grid(col_label_list_source,
                                 voxel_coords_source).squeeze()
mask_labels = [ mode(modified_labels[label_grid == 0])[0].squeeze() ]

for l in mask_labels:
    modified_labels[modified_labels == l] = np.nan
cluster_image = mode(modified_labels, axis=1)[0].squeeze()
for l in mask_labels:
    cluster_image[cluster_image == l] = np.nan

plt.rcParams["figure.figsize"] = [16, 16]
fig, ax = plt.subplots(1,1)
ax.imshow(cluster_image, cmap=plt.get_cmap('Accent'), interpolation='none')
draw_region_labels()
plt.savefig("clustering.png")



In [ ]: