In [1]:
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import urllib2
import scipy.stats as stats
np.set_printoptions(precision=3, suppress=True)
url = ('https://raw.githubusercontent.com/Upward-Spiral-Science'
'/data/master/syn-density/output.csv')
data = urllib2.urlopen(url)
csv = np.genfromtxt(data, delimiter=",")[1:] # don't want first row (labels)
# chopping data based on thresholds on x and y coordinates
x_bounds = (409, 3529)
y_bounds = (1564, 3124)
def check_in_bounds(row, x_bounds, y_bounds):
if row[0] < x_bounds[0] or row[0] > x_bounds[1]:
return False
if row[1] < y_bounds[0] or row[1] > y_bounds[1]:
return False
if row[3] == 0:
return False
return True
indices_in_bound, = np.where(np.apply_along_axis(check_in_bounds, 1, csv,
x_bounds, y_bounds))
data_thresholded = csv[indices_in_bound]
n = data_thresholded.shape[0]
def synapses_over_unmasked(row):
s = (row[4]/row[3])*(64**3)
return [row[0], row[1], row[2], s]
syn_unmasked = np.apply_along_axis(synapses_over_unmasked, 1, data_thresholded)
syn_normalized = syn_unmasked
print 'end setup'
In [2]:
# syn_unmasked_T = syn_unmasked.values.T.tolist()
# columns = [syn_unmasked[i] for i in [4]]
plt.boxplot(syn_unmasked[:,3], 0, 'gD')
plt.xticks([1], ['Set'])
plt.ylabel('Density Distribution')
plt.title('Density Distrubution Boxplot')
plt.show()
In [3]:
figure = plt.figure()
plt.hist(data_thresholded[:,4],5000)
plt.title('Histogram of Synapses in Brain Sample')
plt.xlabel('Synapses')
plt.ylabel('frequency')
Out[3]:
Since we don't see the spike in the histogram of synapses, the spike may be some artifact of the unmasked value. Let's take a look!
In [4]:
plt.hist(data_thresholded[:,3],5000)
plt.title('Histogram of Unmasked Values')
plt.xlabel('unmasked')
plt.ylabel('frequency')
Out[4]:
In [5]:
# Spike
a = np.apply_along_axis(lambda x:x[4]/x[3], 1, data_thresholded)
spike = a[np.logical_and(a <= 0.0015, a >= 0.0012)]
print "Average Density: ", np.mean(spike)
print "Std Deviation: ", np.std(spike)
# Histogram
n, bins, _ = plt.hist(spike, 2000)
plt.title('Histogram of Synaptic Density')
plt.xlabel('Synaptic Density (syn/voxel)')
plt.ylabel('frequency')
bin_max = np.where(n == n.max())
print 'maxbin', bins[bin_max][0]
bin_width = bins[1]-bins[0]
syn_normalized[:,3] = syn_normalized[:,3]/(64**3)
spike = syn_normalized[np.logical_and(syn_normalized[:,3] <= 0.00131489435301+bin_width, syn_normalized[:,3] >= 0.00131489435301-bin_width)]
print "There are ", len(spike), " points in the 'spike'"
In [7]:
spike_thres = data_thresholded[np.logical_and(syn_normalized[:,3] <= 0.00131489435301+bin_width, syn_normalized[:,3] >= 0.00131489435301-bin_width)]
print spike_thres
In [95]:
import math
fig, ax = plt.subplots(1,2,sharey = True, figsize=(20,5))
weights = np.ones_like(spike_thres[:,3])/len(spike_thres[:,3])
weights2 = np.ones_like(data_thresholded[:,3])/len(data_thresholded[:,3])
ax[0].hist(data_thresholded[:,3], bins = 100, alpha = 0.5, weights = weights2, label = 'all data')
ax[0].hist(spike_thres[:,3], bins = 100, alpha = 0.5, weights = weights, label = 'spike')
ax[0].legend(loc='upper right')
ax[0].set_title('Histogram of Unmasked values in the Spike vs All Data')
weights = np.ones_like(spike_thres[:,4])/len(spike_thres[:,4])
weights2 = np.ones_like(data_thresholded[:,4])/len(data_thresholded[:,4])
ax[1].hist(data_thresholded[:,4], bins = 100, alpha = 0.5, weights = weights2, label = 'all data')
ax[1].hist(spike_thres[:,4], bins = 100, alpha = 0.5, weights = weights, label = 'spike')
ax[1].legend(loc='upper right')
ax[1].set_title('Histogram of Synapses in the Spike vs All Data')
plt.show()
Cluster 4 has relatively high density
In [48]:
import sklearn.mixture as mixture
n_clusters = 4
gmm = mixture.GMM(n_components=n_clusters, n_iter=1000, covariance_type='diag')
labels = gmm.fit_predict(syn_unmasked)
clusters = []
for l in range(n_clusters):
a = np.where(labels == l)
clusters.append(syn_unmasked[a,:])
print len(clusters)
print clusters[0].shape
counter = 0
indx = 0
indy = 0
for cluster in clusters:
s = cluster.shape
cluster = cluster.reshape((s[1], s[2]))
counter += 1
print
print'Working on cluster: ' + str(counter)
plt.boxplot(cluster[:,-1], 0, 'gD', showmeans=True)
plt.xticks([1])
plt.ylabel('Density')
plt.title('Boxplot of density \n at cluster = ' + str(int(counter)))
plt.show()
print "Done with cluster"
plt.show()
In [47]:
data_uniques, UIndex, UCounts = np.unique(syn_unmasked[:,2], return_index = True, return_counts = True)
'''
print 'uniques'
print 'index: ' + str(UIndex)
print 'counts: ' + str(UCounts)
print 'values: ' + str(data_uniques)
'''
fig, ax = plt.subplots(3,4,figsize=(10,20))
counter = 0
for i in np.unique(syn_unmasked[:,2]):
# print 'calcuating for z: ' + str(int(i))
def check_z(row):
if row[2] == i:
return True
return False
counter += 1
xind = (counter%3) - 1
yind = (counter%4) - 1
index_true = np.where(np.apply_along_axis(check_z, 1, syn_unmasked))
syn_uniqueZ = syn_unmasked[index_true]
ax[xind,yind].boxplot(syn_uniqueZ[:,3], 0, 'gD')
ax[xind,yind].set_xticks([1], i)
ax[xind,yind].set_ylabel('Density')
ax[xind,yind].set_title('Boxplot at \n z = ' + str(int(i)))
#print 'yind = %d, xind = %d' %(yind,xind)
#print i
ax[xind+1,yind+1].boxplot(syn_uniqueZ[:,3], 0, 'gD',showmeans=True)
ax[xind+1,yind+1].set_xticks([1], 'set')
ax[xind+1,yind+1].set_ylabel('Density')
ax[xind+1,yind+1].set_title('Boxplot for \n All Densities')
print "Density Distrubtion Boxplots:"
plt.tight_layout()
plt.show()
In [ ]:
In [ ]: