In [1]:
#Clean syn density data
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
%matplotlib inline
import urllib2
import numpy as np

np.set_printoptions(precision=3, suppress=True)
url = ('https://raw.githubusercontent.com/Upward-Spiral-Science'
       '/data/master/syn-density/output.csv')
data = urllib2.urlopen(url)
csv = np.genfromtxt(data, delimiter=",")[1:] # don't want first row (labels)
#x_bounds = (409, 3529), starts at 19
#y_bounds = (1564, 3124), starts at 1369
csv_clean = csv[np.logical_not(csv[:,3] == 0)] # get rid of unmasked values of zero
csv_clean = csv_clean[csv_clean[:,0] >= 409]
csv_clean = csv_clean[csv_clean[:,0] <= 3529]
csv_clean = csv_clean[csv_clean[:,1] >= 1564]
csv_clean = csv_clean[csv_clean[:,1] <= 3124]
csv_clean_no_ratio = csv_clean
csv_clean[:,4] = np.divide(csv_clean[:,4],csv_clean[:,3])
csv_clean[:,4] = csv_clean[:,4]*(64**3)

1) Histograms (Jay, Bijan)

1.1 Plotting histograms of cleaned data (Bijan)


In [2]:
def check_condition(row):
    if row[3] == 0:
        return False
    return True

print 'ayylmao'

a = np.apply_along_axis(check_condition, 1, csv)
a = np.where(a == True)[0]
nonZeroMask = csv[a, :]

synDividedMask = np.divide(nonZeroMask[:,4],nonZeroMask[:,3])
synDividedMask = synDividedMask * (64**3)
accurateDataT = np.vstack((nonZeroMask[:,0],nonZeroMask[:,1],nonZeroMask[:,2],synDividedMask))
accurateData = accurateDataT.T
plt.hist(accurateData[:,3],bins=100)
plt.show()



In [3]:
cleaned = accurateData[accurateData[:,0] >= 409]
cleaned = cleaned[cleaned[:,0] <= 3529]
cleaned = cleaned[cleaned[:,1] >= 1564]
cleaned = cleaned[cleaned[:,1] <= 3124]

plt.hist(cleaned[:,3], bins=100)
plt.show()


This looks like there is an optimal density for synapse/unmasked values at 340 or so.

1.2) What is the average synapse density per voxel? How does it compare to average and min/max synapse density per bin? (Jay)

Note the 2 large spikes in the histogram.


In [4]:
# chopping data based on thresholds on x and y coordinates
x_bounds = (409, 3529)
y_bounds = (1564, 3124)

def check_in_bounds(row, x_bounds, y_bounds):
    if row[0] < x_bounds[0] or row[0] > x_bounds[1]:
        return False
    if row[1] < y_bounds[0] or row[1] > y_bounds[1]:
        return False
    if row[3] == 0:
        return False
    
    return True

indices_in_bound, = np.where(np.apply_along_axis(check_in_bounds, 1, csv, x_bounds, y_bounds))
data_thresholded = csv[indices_in_bound]
n = data_thresholded.shape[0]

total_unmasked = np.sum(data_thresholded[:, 3])
total_syn = np.sum(data_thresholded[:, 4])
print "avg density per voxel: ", total_syn/total_unmasked
a = np.apply_along_axis(lambda x:x[4]/x[3], 1, data_thresholded)
print "average per bin: ", np.average(a), ", std dev: ", np.std(a)
print "max/min bin density: ", np.max(a), ", ", np.min(a)
print np.sum(a)
hist_n, bins, _ = plt.hist(a, 5000)
plt.xlim(-.0001, .0035)
plt.show()


avg density per voxel:  0.0012107238853
average per bin:  0.00115002980202 , std dev:  0.000406563246763
max/min bin density:  0.00338020281217 ,  0.0
41.4424739456

1.3) What fraction of the samples have 0 density? How much data was thrown out due to unmasked being 0? (Jay)


In [5]:
print "fraction: ", hist_n[0]/len(a)

ux = np.unique(data_thresholded[:, 0]).shape[0]
uy = np.unique(data_thresholded[:, 1]).shape[0]
uz = np.unique(data_thresholded[:, 2]).shape[0]
exp = ux*uy*uz
actual = data_thresholded.shape[0]
print "actual: %d, expected: %d, difference: %d" % (actual, exp, exp-actual)


fraction:  0.0133477633478
actual: 36036, expected: 36531, difference: 495

2) Is joint distribution uniform? (Jay)


In [6]:
# chi squared test on all bins
import scipy.stats as stats

def synapses_over_unmasked(row):
    s = (row[4]/row[3])*(64**3)
    return [row[0], row[1], row[2], s]

syn_unmasked = np.apply_along_axis(synapses_over_unmasked, 1, data_thresholded)
# divide synapses/unmasked by std_dev
# syn_normalized = np.apply_along_axis(normalize_syn, 1, syn_unmasked, 
#                                     np.mean(syn_unmasked[:,-1]), np.std(syn_unmasked[:,-1]))
syn_normalized = syn_unmasked
sum_syn_norm = np.sum(syn_normalized[:, 3])
avg_syn_norm = (sum_syn_norm/n)*np.ones((n))
syn_norm_1col = syn_normalized[:, -1]
print avg_syn_norm
print syn_norm_1col
print stats.chisquare(syn_norm_1col, avg_syn_norm)


[ 301.473  301.473  301.473 ...,  301.473  301.473  301.473]
[ 400.358  420.178  425.628 ...,  242.403  157.405  246.08 ]
Power_divergenceResult(statistic=1357761.9606434423, pvalue=0.0)

3) Are the marginal distributions uniform? (Jay)


In [7]:
def marginalize_along_axis(axis):
    unique = np.unique(syn_normalized[:, axis])
    idx_dict = dict(zip(unique, range(len(unique))))
    syn_per_unique = np.zeros(len(unique))
    for row in syn_normalized[:,]:
        syn_per_unique[idx_dict[row[axis]]] += row[-1]
    return syn_per_unique

for axis in range(3):
    marginalized_data = marginalize_along_axis(axis)
    n = len(np.unique(syn_normalized[:, axis]))
    print "n=", n
    avg = sum_syn_norm/n
    avg_vec = np.ones((n))*avg
    print "axis = ", axis, "chisquare test: ", stats.chisquare(marginalized_data, avg_vec)


n= 81
axis =  0 chisquare test:  Power_divergenceResult(statistic=19641.411877476548, pvalue=0.0)
n= 41
axis =  1 chisquare test:  Power_divergenceResult(statistic=340888.29730870121, pvalue=0.0)
n= 11
axis =  2 chisquare test:  Power_divergenceResult(statistic=89131.566812296718, pvalue=0.0)

4) Data Visualization (Emily)


In [8]:
import matplotlib.pyplot as plt
from matplotlib import cm

fig, ((ax1, ax2, ax3)) = plt.subplots(1,3)
ax1.contourf(csv_clean[:,(0,1,4)].T,20)
ax2.contourf(csv_clean[:,(0,2,4)].T,20)
ax3.contourf(csv_clean[:,(1,2,4)].T,20)
plt.show()


5) 2D Kernel Density Estimation (Emily)

A large portion of the directional density seems to be along the x-axis.


In [9]:
import scipy.stats as stats

#KDE x,syn
data_clean = csv_clean[:,(0,4)].T
# randomly sample
samples = 20000
perm = np.random.permutation(xrange(1, len(data_clean[:])))
rs_data_clean = data_clean[perm[:20000]]

kde = stats.gaussian_kde(data_clean) # KDE for x,synapses
density = kde(data_clean)

fig1 = plt.figure(figsize=(12,6))
x, syn = data_clean
plt.scatter(x, syn, c=density)
plt.show()

#KDE y,syn
fig2 = plt.figure(figsize=(12,6))
data_clean = csv_clean[:,(1,4)].T
# randomly sample
samples = 20000
perm = np.random.permutation(xrange(1, len(data_clean[:])))
rs_data_clean = data_clean[perm[:20000]]

kde = stats.gaussian_kde(data_clean) # KDE for y,synapses
density = kde(data_clean)

y, syn = data_clean
plt.scatter(x, syn, c=density)
plt.show()

#KDE z,syn
fig3 = plt.figure(figsize=(12,6))
data_clean = csv_clean[:,(1,4)].T
# randomly sample
samples = 20000
perm = np.random.permutation(xrange(1, len(data_clean[:])))
rs_data_clean = data_clean[perm[:20000]]

kde = stats.gaussian_kde(data_clean) # KDE for z,synapses
density = kde(data_clean)

z, syn = data_clean
plt.scatter(x, syn, c=density)
plt.show()


We can actually generate a rough estimate of direction of increasing density by looking at change in average density across specific axes. This is a rough estimate but should help analysis in the future.

5.1) Rough estimate of change in density direction. (Bijan)


In [10]:
#looking at change in synapse density across halves of x,y,z
meanx = np.mean(cleaned[:,0])
def sortx(row):
    if row[0] <= meanx:
        return False
    return True
meany = np.mean(cleaned[:,1])
def sorty(row):
    if row[0] <= meany:
        return False
    return True
meanz = np.mean(cleaned[:,2])
def sortz(row):
    if row[0] <= meanz:
        return False
    return True
a = np.apply_along_axis(sortx, 1, cleaned)
a = np.where(a == False)[0]
smallx = csv[a, 3]
bigx = csv[~a, 3]

a = np.apply_along_axis(sorty, 1, cleaned)
a = np.where(a == False)[0]
smally = csv[a, 3]
bigy = csv[~a, 3]

a = np.apply_along_axis(sortz, 1, cleaned)
a = np.where(a == False)[0]
smallz = csv[a, 3]
bigz = csv[~a, 3]

#now with data halved, we calculate denisty differeces

xdiff = np.mean(bigx) - np.mean(smallx)
ydiff = np.mean(bigy) - np.mean(smally)
zdiff = np.mean(bigz) - np.mean(smallz)
changeDens = [xdiff, ydiff, zdiff]
changeDens = np.divide(changeDens,zdiff)
print changeDens

x, y, z  = np.meshgrid(np.arange(1, 9, 4), np.arange(1, 17, 8), np.arange(1, 3))
fig = plt.figure(figsize=(10, 10))
ax = fig.gca(projection='3d')
ax.quiver(x, y, z, -xdiff, -ydiff, -zdiff, length=2, color="Tomato", alpha=.8, 
          arrow_length_ratio=.2)
ax.view_init(elev=18, azim=-30) 
ax.dist = 8
plt.show()


[-7.359 -5.85   1.   ]