In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import urllib2
from __future__ import division
plt.style.use('ggplot')
np.random.seed(1)
url = ('https://raw.githubusercontent.com/Upward-Spiral-Science'
'/data/master/syn-density/output.csv')
data = urllib2.urlopen(url)
csv = np.genfromtxt(data, delimiter=",",dtype='int')[1:] # don't want first row (labels)
data = csv
print data[::1000]
sizes = [len(np.unique(data[:, i])) for i in range(3)]
ranges = [(np.max(data[:, i]), np.min(data[:,i])) for i in range(3)]
ranges_diff = [np.max(data[:, i])-np.min(data[:,i]) for i in range(3)]
print np.max(data[:,3])
print sizes
print ranges
print ranges_diff
for i, ax in zip(range(3), ['x', 'y', 'z']):
print ax + '-axis: '
print 'unique bins (in data): ', np.unique(data[:, i]).size
print np.unique(data[:, i])
print
From the data given, it seems that in the x-direction, we have 108 bins, and in the y-direction, 86. Further support for this claim: 108/85 ~= 450/350; 450x350 the stated dimensions (µm) in Bock 2011).
In [2]:
xPix = 135424
yPix = 119808
xPixPerBin = xPix/108.0
yPixPerBin = yPix/86.0
print xPixPerBin, yPixPerBin
# Now since each bin is 40 data coordinates we can define the following function to convert from coordinates to pixels.
In [3]:
def coords_to_px(xcoord, ycoord):
c_vec = np.array([xcoord, ycoord], dtype='float')
c_vec /= 39.0
return (c_vec[0]*xPixPerBin, c_vec[1]*yPixPerBin)
# check that max coordinate values are close to max pixels
print coords_to_px(4192, 3358)
# how big is a bin? (just a sanity check, should obviously
# be identical to xPixPerBin and yPixPerBin)
print coords_to_px(39.0, 39.0)
Looking through the JavaScript code on viz.neurodata.io, we see that its tiling with 512x512 .png images. At resolution 0, each image has 512x512 "pixels" (correspdonding to the brain images) in it. This can be seen b/c total tiles in the x_direction is 264, and 264x512=135168, approximately our max pixels, and for y-direction, max tiles is 233, 233x512=119296, approximately the max pixels. Similarly for resolution 1, each 512x512 pmg now holds 1024x1024 "pixels", and so on as resolution goes up. Also note that at resolution 1, a single tile is very close in size to a single bin (looking at the second line of output of the previous code block, we see that bins are slightly larger).
In [4]:
def get_tilenums_at_res(xcoord, ycoord, res):
x, y = coords_to_px(xcoord, ycoord)
x = np.floor(float(x)/(512*(2**res)))
y = np.floor(float(y)/(512*(2**res)))
return x,y
def get_image_url1(xcoord, ycoord, res, z):
x, y = get_tilenums_at_res(xcoord, ycoord, res)
x = int(x)
y = int(y)
end = '/'+reduce(lambda x, y: str(x) +'_'+str(y), [y, x, res])
end += '.png'
imgurl = 'http://openconnecto.me/ocp/catmaid/bock11/image/xy/'+str(z) +end
return imgurl
print get_image_url1(2000, 2000, 0, 2917)
Now just need to figure out z-axis. The z values in the image data go from 2917-4156, which is a range of
In [5]:
print (2917-4156)*-1
So it seems that the z-values in the data correspond approximately to the z-values in the image data, other than a translation of 2917. So let's redefine our function, and put it in one code block so its easy for other people to use...
In [6]:
# NOTE: you can copy just this block into your notebook to use the get_image_url() function
xPix = 135424
yPix = 119808
xPixPerBin = xPix/108.0
yPixPerBin = yPix/86.0
max_tiles_x = 264 # found via inspection of viz html/JS code
max_tiles_y = 233 # found via inspection of viz html/JS code
def coords_to_px(xcoord, ycoord):
c_vec = np.array([xcoord, ycoord], dtype='float')
c_vec /= 39.0
return (c_vec[0]*xPixPerBin, c_vec[1]*yPixPerBin)
def get_tilenums_at_res(xcoord, ycoord, res):
x, y = coords_to_px(xcoord, ycoord)
if(res == 0):
x = np.round(float(x)/512)
y = np.round(float(y)/512)
else:
x = np.round(float(x)/(512*(2**res)))
y = np.round(float(y)/(512*(2**res)))
return x,y
def get_image_url(xcoord, ycoord, zcoord, res=1):
"""
params:
- xcoord, ycoord, zcoord all in terms of coordinates in original data file
- res = image resolution, default = 1
since 1024x1024 pixels, approx. the size of a bin--i think
returns: (string) url of image
"""
zcoord += 2917
z = int(zcoord)
x, y = get_tilenums_at_res(int(xcoord), int(ycoord), res)
x = int(x)
y = int(y)
if(x > max_tiles_x//(2**res)): x = max_tiles_x//(2**res)
if(y > max_tiles_y//(2**res)): y = max_tiles_y//(2**res)
end = '/' + reduce(lambda x, y: str(x) +'_'+str(y), [y, x, res])
end += '.png'
imgurl = 'http://openconnecto.me/ocp/catmaid/bock11/image/xy/' + str(z)
return imgurl+end
In [7]:
from IPython.display import Image, HTML, display
disp_dim = {'width': 200, 'height': 200} # just for quickly setting image width/height
m = np.max(data[:, -1])
a = np.where(data[:, -1]==m)
args = list(*data[a, (0, 1, 2)])+[0]
imgs = []
for r in range(3):
args[-1] = r
u = get_image_url(*args)
print u
imgs.append(Image(url=u, **disp_dim))
display(*imgs)
In [8]:
dens_data = np.copy(data).astype(float)
dens_data = dens_data[np.where(dens_data[:,3] != 0)]
dens_data[:, 3] = dens_data[:, 4]/dens_data[:, 3]
dens_data = dens_data[:,:-1]
print np.average(dens_data[:,-1])
a = np.argsort(dens_data[:, -1])
urlsMin, urlsMax = zip(*[(get_image_url(*dens_data[a[i],:-1]),
get_image_url(*dens_data[a[-1-i],:-1]))
for i in range(9)])
tagsMin = ''.join(["<img style='width: 80px; margin: 0px; padding-right: 3px; float: left;' src='%s' />" % str(u)
for u in urlsMin ])
tagsMin += '<br> <br>'
tagsMax = ''.join(["<img style='width: 80px; margin: 0px; padding-right: 3px; float: left;' src='%s' />" % str(u)
for u in urlsMax ])
display(HTML(tagsMin))
display(HTML(tagsMax))
Black regions most likely are masked regions, thus it is actually not surprising too see large amounts of masked for both high and low density areas (since low unmasked increases density, but at the same time lowers synaptic probability). Furthermore, note that the data is binned across many z-slices, while here, we are only looking at one z-slice at a time, thus it is plausible for a high density bin to have an entire slice masked. This also indicates that it would be beneficial to write a function that computes pixel-wise average across z-slices for a bin and returns the corresponding image. We can also only look at the more 'cleaned' data, as many boundary points are likely to be picked up here.
In [9]:
# get the clean data
x_bounds = (409, 3529)
y_bounds = (1564, 3124)
def check_in_bounds(row, x_bounds, y_bounds):
if row[0] < x_bounds[0] or row[0] > x_bounds[1]:
return False
if row[1] < y_bounds[0] or row[1] > y_bounds[1]:
return False
if row[3] == 0:
return False
return True
indices_in_bound, = np.where(np.apply_along_axis(check_in_bounds, 1, csv, x_bounds, y_bounds))
data_clean = csv[indices_in_bound]
dens_data = np.copy(data_clean).astype(float)
dens_data = dens_data[np.where(dens_data[:,3] != 0)]
dens_data[:, 3] = dens_data[:, 4]/dens_data[:, 3]
dens_data = dens_data[:,:-1]
print np.average(dens_data[:,-1])
a = np.argsort(dens_data[:, -1])
urlsMin, urlsMax = zip(*[(get_image_url(*dens_data[a[i],:-1]),
get_image_url(*dens_data[a[-1-i],:-1]))
for i in range(9)])
tagsMin = ''.join(["<img style='width: 80px; margin: 0px; padding-right: 3px; float: left;' src='%s' />" % str(u)
for u in urlsMin ])
tagsMin += '<br> <br>'
tagsMax = ''.join(["<img style='width: 80px; margin: 0px; padding-right: 3px; float: left;' src='%s' />" % str(u)
for u in urlsMax ])
display(HTML(tagsMin))
display(HTML(tagsMax))
How about regions with high unmasked, and low synapses, and high unmasked with high synapses?
In [10]:
from itertools import count
avg_unmasked = np.average(data[:,3])
high_unmasked = data[np.where(data[:,3] > avg_unmasked)]
low_synapses = []
for s in count():
low_synapses = high_unmasked[np.where(high_unmasked[:,-1]==s)]
if low_synapses.size > 0:
print s
break
d_low = low_synapses[0]
print d_low
imgs = []
imgs.append(Image(url=get_image_url(*d_low[:3]), **disp_dim))
max_s = np.max(data[:, 4])
print max_s
high_synapses = []
for s in range(max_s, 0, -1):
high_synapses = high_unmasked[np.where(high_unmasked[:,-1]==s)]
if high_unmasked.size > 0:
print s
break
d_high = high_synapses[0]
print d_high
imgs.append(Image(url=get_image_url(*d_high[:3]), **disp_dim))
display(*imgs)
In [11]:
# zoom in a resolution
display(Image(url=get_image_url(*d_low[:3], res=0), **disp_dim),
Image(url=get_image_url(*d_high[:3], res=0), **disp_dim))
Since it appears that the coordinates in our data correspond to the center of a bin, we can see that there was no cut applied on the x-axis, since data starts at 19 and bin "length" is 40, and similarly no cut along z since data starts at 55 and bin "depth" is 110. But along the y-axis data starts at 1369; floor(1369/40)=34, meaning the first 34 bins along the y-axis was cut across all data.
In [12]:
# fairly arbitrarily, look at midpoint for x and z
midx, midz = [np.median(np.unique(data[:, i])) for i in [0,2]]
y = np.min(data[:, 1])
print midx, y, midz
Image(url=get_image_url(midx, y, midz, 2), **disp_dim)
Out[12]:
In [13]:
# nothing apparently notable, lets view across entire x-axis
from itertools import groupby
urls = [k for k,_ in groupby(
[get_image_url(x, y, midz, 3)
for x in np.sort(np.unique(data[:, 0]))])]
imgTags = ''.join( ["<img style='width: 20px; margin: 0px; padding-bottom: 3px; float: left;' src='%s' />" % str(u)
for u in urls ])
display(HTML(imgTags))
# y value below cutoff
urls = [k for k,_ in groupby(
[get_image_url(x, 39*10, midz, 3)
for x in np.sort(np.unique(data[:, 0]))])]
imgTags = ''.join( ["<img style='width: 20px; margin: 0px; padding-bottom: 3px; float: left;' src='%s' />" % str(u)
for u in urls ])
display(HTML(imgTags))
First row shows where the data was sliced, second is somewhere before it was sliced (that is, data not included in the set)... Since these black regions probably correspond to regions that are heavily masked, perhaps this is why data split here?
In [14]:
# what bins along this y value have unmasked = 0?
for row in data[np.where(data[:, 1] == y)]:
if row[3] == 0: print row
print np.average(data[np.where(data[:, 1] == y+39*2), 3])
In [15]:
# do same thing for z = 1165, since we observe this is where all the unmasked = 0 bins occur
urls = [k for k,_ in groupby(
[get_image_url(x, y, 1165, 3)
for x in np.sort(np.unique(data[:, 0]))])]
imgTags = ''.join( ["<img style='width: 20px; margin: 0px; padding-bottom: 3px; float: left;' src='%s' />" % str(u)
for u in urls ])
display(HTML(imgTags))
In [16]:
a, = np.where(data[:, 3] == 0)
# first unmasked = 0
args = list(data[a[0], (0, 1, 2)])
u = get_image_url(*args)
print u
Image(url=u, **disp_dim)
Out[16]:
In [17]:
# middle one
args = list(data[a[a.size//2], (0, 1, 2)])
u = get_image_url(*args)
print u
Image(url=u, **disp_dim)
Out[17]:
In [18]:
# zoom out middle
args += [7]
u = get_image_url(*args)
print u
Image(url=u, **disp_dim)
Out[18]:
Looks like regions where unmasked = 0 are corresponding to edges..
In [19]:
# last
args = list(data[a[-1], (0, 1, 2)])
u = get_image_url(*args)
print u
Image(url=u, **disp_dim)
Out[19]:
Seems to be working... Another sanity check: if at resolution 0, the tiles returned contain 512 pixels, and that doubles as resolution increases, the number of pixels, at resolution r should be 512x2^r. Therefore resoltuion 8 should return approximately the entire cross section, since 512x2^8=131072, which is approximately the total number of pixels along the x-axis (135424). Let's test this out by grabbing the first z-layer's tile at the origin for resolution 8, i.e. the tile with url http://openconnecto.me/ocp/catmaid/bock11/image/xy/2917/0_0_8.png.
Seems to be correct.
TODO i.e stuff I thought I had time for this week:
In [ ]: