In [2]:
# Clean data, chopping off edges
import numpy as np
import urllib2
import matplotlib.pyplot as plt
%matplotlib inline
url = ('https://raw.githubusercontent.com/Upward-Spiral-Science'
'/data/master/syn-density/output.csv')
data = urllib2.urlopen(url)
csv = np.genfromtxt(data, delimiter=",")[1:] # don't want first row (labels)
#x_bounds = (409, 3529), starts at 19
#y_bounds = (1564, 3124), starts at 1369
csv_clean = csv[np.logical_not(csv[:,3] == 0)] # get rid of unmasked values of zero
csv_clean = csv_clean[csv_clean[:,0] >= 409]
csv_clean = csv_clean[csv_clean[:,0] <= 3529]
csv_clean = csv_clean[csv_clean[:,1] >= 1564]
csv_clean = csv_clean[csv_clean[:,1] <= 3124]
csv_clean_no_ratio = csv_clean
csv_clean[:,4] = np.divide(csv_clean[:,4],csv_clean[:,3])
csv_clean[:,4] = csv_clean[:,4]*(64**3)
In [21]:
plt.figure()
plt.figure(figsize=(28,7))
# x-direction
# sum up y-z plane at each x
plt.subplot(131)
unique_x = np.unique(csv_clean[:,0])
sum_x = [0]*len(unique_x)
i = 0
for x in unique_x:
sum_x[i] = np.sum(csv_clean[csv_clean[:,0] == x][4])*0.0001
i = i+1
plt.bar(unique_x, sum_x, 1)
plt.xlim(450, 3600)
plt.ylabel('density in synapses/voxel 1e+4',fontsize=20)
plt.xlabel('x-coordinate',fontsize=20)
plt.title('Total Density across Each X-Layer',fontsize=20)
# y-direction
# sum up x-z plane at each y
plt.subplot(132)
unique_y = np.unique(csv_clean[:,1])
sum_y = [0]*len(unique_y)
i = 0
for y in unique_y:
sum_y[i] = np.sum(csv_clean[csv_clean[:,1] == y][4])*.0001
i = i+1
plt.bar(unique_y, sum_y, 1)
plt.xlim(1570, 3190)
plt.ylabel('density in synapses/voxel 1e+4',fontsize=20)
plt.xlabel('y-coordinate',fontsize=20)
plt.title('Total Density across Each Y-Layer',fontsize=20)
# z-direction
# sum up x-y plane at each z
plt.subplot(133)
unique_z = np.unique(csv_clean[:,2])
sum_z = [0]*len(unique_z)
i = 0
for z in unique_z:
sum_z[i] = np.sum(csv_clean[csv_clean[:,2] == z][4])*.0001
i = i+1
plt.bar(unique_z, sum_z, 1)
plt.ylabel('density in synapses/voxel 1e+4',fontsize=20)
plt.xlabel('z-coordinate',fontsize=20)
plt.title('Total Density across Each Z-Layer',fontsize=20)
Out[21]:
In [129]:
# Divide by middle z-layer
from scipy.signal import argrelextrema as relext
x_minima = relext(np.array(sum_x), np.less)
print 'X minima ', x_minima
delimeter = np.median(csv_clean[:,2])
left_volume = csv_clean[csv_clean[:,2] <= delimeter]
sum_left = [0]*len(unique_x)
i = 0
for x in unique_x:
sum_left[i] = np.sum(left_volume[left_volume[:,0] == x][:,4])*.0001
i = i+1
left_minima = relext(np.array(sum_left),np.less)
right_volume = csv_clean[csv_clean[:,2] > delimeter]
sum_right = [0]*len(unique_x)
i = 0
for x in unique_x:
sum_right[i] = np.sum(right_volume[right_volume[:,0] == x][:,4])*.0001
i = i+1
right_minima = relext(np.array(sum_right),np.less)
print "Left minima: ", left_minima
print "Right minima: ", right_minima
Since from 1, it seems as though layering may be present in the z-direction.
In [103]:
import sklearn.mixture as mixture
# Check for uniformity in clusters along x-direction
plt.figure(figsize=(7,7))
divisions = np.unique(csv_clean[:,2])
# Randomly Sample
samples = 10000
perm = np.random.permutation(xrange(1, len(csv_clean[:])))
csv_clean_sample = csv_clean[perm[:samples]]
for d in divisions:
z_layer = csv_clean_sample[csv_clean_sample[:,2] == d]
#Run GMM on layer
print 'Running GMM on layer ' + str(d)
max_clusters = 35
bic = np.array([])
i = np.array(range(1, max_clusters))
for idx in range(1, max_clusters):
#print "Fitting and evaluating model with " + str(idx) + " clusters."
gmm = mixture.GMM(n_components=idx,n_iter=1000,covariance_type='diag', random_state=1)
gmm.fit(z_layer[:,(0,1,4)])
bic = np.append(bic, gmm.bic(z_layer[:,(0,1,4)]))
#print bic
plt.plot(i, 1.0/bic)
plt.hold(True)
plt.title('BIC for each Z-layer')
plt.ylabel('score')
plt.xlabel('number of clusters')
plt.legend(divisions)
plt.show()
From above, the optimal number of clusters ranges across z-layers. The overall trend looks like the elbows are around 6 clusters.
In [142]:
import sklearn.cluster as cluster
fig = plt.figure(figsize=(10, 7))
ax = fig.gca(projection='3d')
for d in divisions:
z_layer = csv_clean[csv_clean[:,2] == d]
#Run GMM on layer
print 'Running GMM on layer ' + str(d)
print "Fitting and evaluating model with 6 clusters."
gmm = mixture.GMM(n_components=6,n_iter=1000,covariance_type='diag', random_state=1)
gmm.fit(z_layer[:,(0,1,4)])
center1 = [gmm.means_[0][0],gmm.means_[0][1],gmm.means_[0][2]]
center2 = [gmm.means_[1][0],gmm.means_[1][1],gmm.means_[1][2]]
center3 = [gmm.means_[2][0],gmm.means_[2][1],gmm.means_[2][2]]
ax.scatter(center1, center2, center3, marker='o', s=100)
plt.hold(True)
plt.title('Cluster centers for every Z-layer')
plt.ylabel('y')
plt.xlabel('x')
ax.set_zlabel('z')
plt.show()
Histogram from last time: average density 0.00115002980202 , std dev 0.000406563246763
In [3]:
from mpl_toolkits.mplot3d import axes3d
# chopping data based on thresholds on x and y coordinates
x_bounds = (409, 3529)
y_bounds = (1564, 3124)
def check_in_bounds(row, x_bounds, y_bounds):
if row[0] < x_bounds[0] or row[0] > x_bounds[1]:
return False
if row[1] < y_bounds[0] or row[1] > y_bounds[1]:
return False
if row[3] == 0:
return False
return True
indices_in_bound, = np.where(np.apply_along_axis(check_in_bounds, 1, csv, x_bounds, y_bounds))
data_thresholded = csv[indices_in_bound]
print data_thresholded.shape
n = data_thresholded.shape[0]
# Original
total_unmasked = np.sum(data_thresholded[:, 3])
total_syn = np.sum(data_thresholded[:, 4])
a = np.apply_along_axis(lambda x:x[4]/x[3], 1, data_thresholded)
#hist_n, bins, _ = plt.hist(a, 5000)
#plt.xlim(-.0001, .0035)
#plt.show()
# Spike
spike = a[np.logical_and(a <= 0.0015, a >= 0.0012)]
print "Points in spike: ", len(spike)
print "Average Density: ", np.mean(spike)
print "Std Deviation: ", np.std(spike)
# Histogram
hist_n, bins, _ = plt.hist(spike, 2000)
plt.title('Histogram of Synaptic Density')
plt.xlabel('Synaptic Density (syn/voxel)')
plt.ylabel('frequency')
# Scatter plot
data_thresholded[:,4] = a
spike_coords = data_thresholded[np.logical_and(data_thresholded[:,4] <= 0.0015, data_thresholded[:,4] >= 0.0012)]
# Randomly Sample
samples = 500
perm = np.random.permutation(xrange(1, len(spike_coords[:])))
spike_sample = spike_coords[perm[:samples]]
fig = plt.figure(figsize=(10, 7))
ax = fig.gca(projection='3d')
ax.view_init()
ax.dist = 10 # distance
ax.scatter(
spike_sample[:, 0], spike_sample[:, 1], spike_sample[:, 2], # data
s = [10**(n*1500) for n in spike_sample[:,4]]
)
ax.set_xlim(np.min(spike_sample[:,0]),np.max(spike_sample[:,0]))
ax.set_ylim(np.min(spike_sample[:,1]),np.max(spike_sample[:,1]))
ax.set_zlim(np.min(spike_sample[:,2]),np.max(spike_sample[:,2]))
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
plt.title('Random Sample of Points in Spike')
plt.show()
In [163]:
fig = plt.figure(figsize=(10,7))
ax = fig.gca(projection='3d')
for d in divisions:
z_layer = csv_clean[csv_clean[:,2] == d]
#Run GMM on layer
gmm = mixture.GMM(n_components=6,n_iter=1000,covariance_type='diag', random_state=1)
gmm.fit(z_layer[:,(0,1,4)])
center1 = [gmm.means_[0][0],gmm.means_[0][1],gmm.means_[0][2]]
center2 = [gmm.means_[1][0],gmm.means_[1][1],gmm.means_[1][2]]
center3 = [gmm.means_[2][0],gmm.means_[2][1],gmm.means_[2][2]]
ax.scatter(center1, center2, center3, marker='o', s=100)
plt.hold(True)
plt.hold(True)
resultant_estimate = np.array([-2.578, -1.369, -1])
print resultant_estimate.shape
x, y, z = np.meshgrid(np.arange(-500, 3500, 1000), np.arange(0, 3500, 1000), np.arange(0, 3000, 1000))
ax = fig.gca(projection='3d')
ax.quiver(x, y, z, resultant_estimate[0], resultant_estimate[1], resultant_estimate[2], length=500, color="Tomato", alpha=.8,
arrow_length_ratio=.2)
ax.view_init(elev=18, azim=-30)
plt.title('Cluster centers for each Z-layer')
plt.ylabel('y')
plt.xlabel('x')
ax.set_zlabel('z')
ax.set_xticks([-1000, 0, 1000, 2000, 3000])
ax.set_yticks([0, 1000, 2000, 3000])
ax.set_zticks([0, 1000, 2000, 3000])
plt.show()
Our direction estimate seems to make sense as density seems to get higher as we move in the -yz direction.
In [ ]: