Pnevmatikakis 2014 : Greedy Initialization Algorithm



In [1]:

    
%matplotlib inline
import os, socket
import numpy as np
from matplotlib import pyplot as plt
import mpld3



In [2]:

    
import neuralyzer
from neuralyzer.im import smff



In [3]:

    
plt.rcParams['image.cmap'] = 'gray'
plt.rcParams['image.interpolation'] = 'none'
plt.rcParams['figure.figsize'] = (7,7)



In [4]:

    
if socket.gethostname() == 'kumo':
    #datafile = '/home/michael/datac/140316/data5/data2/Image_0001_0001_channel0.tif'
    datafile = '/home/michael/datac/140316/data5/data2/Image_0001_0001_channel1_testdata.tif'
    #datafile = '/home/michael/datac/140316/data5/data2/Image_0001_0001_channel1_minitestdata.tif'
else:
    datafile = '/Users/michael/coding/RIKEN/data/140316/data5/data2/Image_0001_0001_channel1_testdata.tif'
data = neuralyzer.get_data(datafile)









    



[ 2015-04-21 18:30:25 ] [ log ] [ INFO ] : NEURALYZER LOGGER CREATED
[ 2015-04-21 18:30:25 ] [ log ] [ DEBUG ] : stdoutloglevel: DEBUG
[ 2015-04-21 18:30:25 ] [ data_handler ] [ DEBUG ] : root_path set to /home/michael/lib/neuralyzer/notebooks/dev
[ 2015-04-21 18:30:25 ] [ data_handler ] [ DEBUG ] : loaded data from cache file: /home/michael/datac/140316/data5/data2/Image_0001_0001_channel1_testdata.tif.hdf5

reshape the data



In [5]:

    
dshape = data.shape
data = data.reshape((data.shape[0], data.shape[1]*data.shape[2])).T
data.shape









    Out[5]:





(65536, 1000)

centering the data



In [6]:

    
## MEDIAN CENTERING
mediandata = np.median(data,axis=1)
Y = data - np.tile(mediandata, (data.shape[1], 1)).T



In [7]:

    
## MEAN CENTERING
meandata = np.mean(data,axis=1)
Y = data - np.tile(meandata, (data.shape[1], 1)).T



In [8]:

    
fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(16, 6))

meanim = np.mean(data, axis=1).reshape(dshape[1:])
im = ax[0].imshow(meanim)
_ = plt.colorbar(im, ax=ax[0])
ax[0].set_title('mean image')
im = ax[1].imshow(meandata.reshape(dshape[1:]))
_ = plt.colorbar(im, ax=ax[1])
ax[1].set_title('center image')
im = ax[2].imshow(np.mean(Y, axis=1).reshape(dshape[1:]))
ax[2].set_title('mean of centered stack')
_ = plt.colorbar(im, ax=ax[2])



In [ ]:



In [ ]:



In [ ]:



In [9]:

    
yshape = Y.shape
ytype = Y.dtype



In [10]:

    
yshape









    Out[10]:





(65536, 1000)

memmap



In [10]:

    
memmapfile, _ = os.path.splitext(datafile)
memmapfile += '_memmap.dat'



In [11]:

    
# write the memorymap file
dmm = np.memmap(memmapfile, dtype=Y.dtype, shape=Y.shape, mode='w+')
dmm[:] = Y[:]
del dmm, Y



In [13]:

    
dmm = np.memmap(memmapfile, dtype=ytype, shape=yshape, mode='r')



In [ ]:

Gaussian Kernel



In [29]:

    
2**6









    Out[29]:





64



In [32]:

    
# Memory conserdations toy example
datarows = 16384
datacols = 100
mem = datarows*datacols*2**3/1024**2
print 'memory needed: %s MB' % mem









    



memory needed: 12 MB



In [33]:

    
# real data example
datarows = 512**2
datacols = 18000
mem = datarows*datacols*2**3/1024**3
print 'memory needed: %s GB' % mem









    



memory needed: 35 GB



In [25]:

    
# available memory on kumo : 181535412k
181535412/1024**2









    Out[25]:





173

in principle it should be possible to calculate $Y \times D$ on kumo for the entire dataset

$D$



In [11]:

    
data.shape









    Out[11]:





(65536, 1000)



In [12]:

    
D = smff.gaussian_blur_matrix(dshape[1:], 41, 20)
#D = smff.gaussian_blur_matrix_sparse(yshape[0], 41, 20)



In [13]:

    
whos









    



Variable     Type         Data/Info
-----------------------------------
D            ndarray      65536x65536: 4294967296 elems, type `float64`, 34359738368 bytes (32768 Mb)
Y            ndarray      65536x1000: 65536000 elems, type `float64`, 524288000 bytes (500 Mb)
ax           ndarray      3: 3 elems, type `object`, 24 bytes
data         ndarray      65536x1000: 65536000 elems, type `uint16`, 131072000 bytes (125 Mb)
datafile     str          /home/michael/datac/14031<...>001_channel1_testdata.tif
dshape       tuple        n=3
fig          Figure       Figure(1280x480)
im           AxesImage    AxesImage(860.235,129.294;233.412x233.412)
meandata     ndarray      65536: 65536 elems, type `float64`, 524288 bytes (512 kb)
meanim       ndarray      256x256: 65536 elems, type `float64`, 524288 bytes (512 kb)
mediandata   ndarray      65536: 65536 elems, type `float64`, 524288 bytes (512 kb)
mpld3        module       <module 'mpld3' from '/ho<...>ages/mpld3/__init__.pyc'>
neuralyzer   module       <module 'neuralyzer' from<...>neuralyzer/__init__.pyc'>
np           module       <module 'numpy' from '/ho<...>ages/numpy/__init__.pyc'>
os           module       <module 'os' from '/home/<...>nd/lib/python2.7/os.pyc'>
plt          module       <module 'matplotlib.pyplo<...>s/matplotlib/pyplot.pyc'>
smff         module       <module 'neuralyzer.im.sm<...>/neuralyzer/im/smff.pyc'>
socket       module       <module 'socket' from '/u<...>ib/python2.7/socket.pyc'>
yshape       tuple        n=2
ytype        dtype        float64



In [14]:

    
plt.imshow(D[:1000, :1000])









    Out[14]:





<matplotlib.image.AxesImage at 0x5555dd0>



In [15]:

    
plt.imshow(D[:,10*256+128].reshape(256,256))









    Out[15]:





<matplotlib.image.AxesImage at 0x6576150>



In [16]:

    
D.shape









    Out[16]:





(65536, 65536)



In [ ]:

    
%prun rho = np.dot(D.T,Y)



In [ ]:

    
whos



In [17]:

    
rho.shape









    Out[17]:





(16384, 100)



In [18]:

    
wcent = np.argmax(rho.sum(axis=1))
wcent = (int(wcent/128), np.mod(wcent, 128))



In [19]:

    
wcent









    Out[19]:





(107, 60)



In [20]:

    
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12, 6))
ax[0].imshow(rho.mean(axis=1).reshape(128,128))
ax[1].imshow(Y.mean(axis=1).reshape(128,128))









    Out[20]:





<matplotlib.image.AxesImage at 0x4be3d10>



In [22]:

    
def greedy_neuron_id(Y, K, tau, w):
    med = np.median(Y, axis=1)
    MED = np.array(Y.shape[0]*[med])
    Y = Y - MED
    R = Y.copy()
    D = gaussian_blur_matrix(Y.shape[0], w, tau)
    for i in range(K):
        rho = np.dot(D.T, R)
        v =



In [21]:

    
whos









    



Variable     Type       Data/Info
---------------------------------
D            ndarray    16384x16384: 268435456 elems, type `float64`, 2147483648 bytes (2048 Mb)
Y            ndarray    16384x100: 1638400 elems, type `float64`, 13107200 bytes (12 Mb)
ax           ndarray    3: 3 elems, type `object`, 24 bytes
data         ndarray    16384x100: 1638400 elems, type `uint16`, 3276800 bytes (3 Mb)
datacols     int        18000
datafile     str        /home/michael/datac/14031<...>channel1_minitestdata.tif
datarows     int        262144
dshape       tuple      n=3
fig          Figure     Figure(1200x480)
meanim       ndarray    128x128: 16384 elems, type `float64`, 131072 bytes (128 kb)
mediandata   ndarray    16384: 16384 elems, type `float64`, 131072 bytes (128 kb)
medianim     ndarray    128x128: 16384 elems, type `float64`, 131072 bytes (128 kb)
mem          int        4096
mpld3        module     <module 'mpld3' from '/ho<...>ages/mpld3/__init__.pyc'>
neuralyzer   module     <module 'neuralyzer' from<...>neuralyzer/__init__.pyc'>
np           module     <module 'numpy' from '/ho<...>ages/numpy/__init__.pyc'>
os           module     <module 'os' from '/home/<...>nd/lib/python2.7/os.pyc'>
plt          module     <module 'matplotlib.pyplo<...>s/matplotlib/pyplot.pyc'>
rho          ndarray    16384x100: 1638400 elems, type `float64`, 13107200 bytes (12 Mb)
smff         module     <module 'neuralyzer.im.sm<...>/neuralyzer/im/smff.pyc'>
socket       module     <module 'socket' from '/u<...>ib/python2.7/socket.pyc'>
yshape       tuple      n=2
ytype        dtype      float64