Group


In [1]:
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
%matplotlib inline 
import numpy as np
import urllib2
import scipy.stats as stats

np.set_printoptions(precision=3, suppress=True)
url = ('https://raw.githubusercontent.com/Upward-Spiral-Science'
       '/data/master/syn-density/output.csv')
data = urllib2.urlopen(url)
csv = np.genfromtxt(data, delimiter=",")[1:] # don't want first row (labels)

# chopping data based on thresholds on x and y coordinates
x_bounds = (409, 3529)
y_bounds = (1564, 3124)

def check_in_bounds(row, x_bounds, y_bounds):
    if row[0] < x_bounds[0] or row[0] > x_bounds[1]:
        return False
    if row[1] < y_bounds[0] or row[1] > y_bounds[1]:
        return False
    if row[3] == 0:
        return False
    
    return True

indices_in_bound, = np.where(np.apply_along_axis(check_in_bounds, 1, csv,
                                                 x_bounds, y_bounds))
data_thresholded = csv[indices_in_bound]
n = data_thresholded.shape[0]


def synapses_over_unmasked(row):
    s = (row[4]/row[3])*(64**3)
    return [row[0], row[1], row[2], s]

syn_unmasked = np.apply_along_axis(synapses_over_unmasked, 1, data_thresholded)
syn_normalized = syn_unmasked
print 'end setup'


end setup

1) Boxplot of General Density


In [2]:
# syn_unmasked_T = syn_unmasked.values.T.tolist()
# columns = [syn_unmasked[i] for i in [4]]

plt.boxplot(syn_unmasked[:,3], 0, 'gD')
plt.xticks([1], ['Set'])
plt.ylabel('Density Distribution')
plt.title('Density Distrubution Boxplot')
plt.show()


2) Is the spike noise? More evidence.

We saw from Emily's analysis that there is strong evidence against the spike being noise. If we see that the spike is noticeable in the histogram of synapses as well as the histogram of synapse density, we will gain even more evidence that the spike is noise.


In [3]:
figure = plt.figure()
plt.hist(data_thresholded[:,4],5000)
plt.title('Histogram of Synapses in Brain Sample')
plt.xlabel('Synapses')
plt.ylabel('frequency')


Out[3]:
<matplotlib.text.Text at 0x111fc41d0>

Since we don't see the spike in the histogram of synapses, the spike may be some artifact of the unmasked value. Let's take a look!

3) What is the spike? We still don't know.


In [4]:
plt.hist(data_thresholded[:,3],5000)
plt.title('Histogram of Unmasked Values')
plt.xlabel('unmasked')
plt.ylabel('frequency')


Out[4]:
<matplotlib.text.Text at 0x112a562d0>

4) Synapses and unmasked: Spike vs Whole Data Set


In [5]:
# Spike
a = np.apply_along_axis(lambda x:x[4]/x[3], 1, data_thresholded)
spike = a[np.logical_and(a <= 0.0015, a >= 0.0012)]
print "Average Density: ", np.mean(spike)
print "Std Deviation: ", np.std(spike)

# Histogram
n, bins, _ = plt.hist(spike, 2000)
plt.title('Histogram of Synaptic Density')
plt.xlabel('Synaptic Density (syn/voxel)')
plt.ylabel('frequency')

bin_max = np.where(n == n.max())

print 'maxbin', bins[bin_max][0]

bin_width = bins[1]-bins[0]
syn_normalized[:,3] = syn_normalized[:,3]/(64**3)
spike = syn_normalized[np.logical_and(syn_normalized[:,3] <= 0.00131489435301+bin_width, syn_normalized[:,3] >= 0.00131489435301-bin_width)]
print "There are ", len(spike), " points in the 'spike'"


Average Density:  0.00134070207006
Std Deviation:  8.46720771375e-05
maxbin 0.00131489435301
There are  136  points in the 'spike'

In [7]:
spike_thres = data_thresholded[np.logical_and(syn_normalized[:,3] <= 0.00131489435301+bin_width, syn_normalized[:,3] >= 0.00131489435301-bin_width)]
print spike_thres


[[    409.    2149.     166.  133848.     176.]
 [    409.    2539.     388.  129275.     170.]
 [    448.    1876.    1165.  149046.     196.]
 [    487.    2422.     943.  136890.     180.]
 [    526.    2383.     943.  136890.     180.]
 [    565.    3007.     610.  114075.     150.]
 [    643.    1993.     610.  138411.     182.]
 [    643.    2344.     277.  156663.     206.]
 [    760.    1876.    1165.  158176.     208.]
 [    760.    1915.      55.  153621.     202.]
 [    760.    2851.     499.  156663.     206.]
 [    799.    2422.    1054.  159705.     210.]
 [    838.    2071.    1054.  159705.     210.]
 [    877.    1564.     721.  144501.     190.]
 [    877.    2305.      55.  155142.     204.]
 [    916.    1759.     166.  138411.     182.]
 [    916.    2110.     832.  147537.     194.]
 [    916.    2422.     388.  149058.     196.]
 [    955.    2227.     277.  156663.     206.]
 [    955.    2461.     499.  158184.     208.]
 [    994.    2890.    1054.  158184.     208.]
 [   1033.    2773.     943.  153621.     202.]
 [   1072.    1564.     943.  153620.     202.]
 [   1072.    2656.     166.  133848.     176.]
 [   1072.    2773.     832.  146016.     192.]
 [   1111.    1993.     610.  139932.     184.]
 [   1111.    2344.    1165.  158182.     208.]
 [   1150.    1642.    1165.  159705.     210.]
 [   1150.    1954.     277.  158184.     208.]
 [   1150.    2617.     832.  146016.     192.]
 [   1150.    2812.     832.  146016.     192.]
 [   1189.    1876.     721.  149058.     196.]
 [   1189.    2539.     166.  133848.     176.]
 [   1189.    3007.     832.  144495.     190.]
 [   1228.    2071.     166.  135369.     178.]
 [   1228.    2227.    1054.  161226.     212.]
 [   1228.    2500.     943.  156663.     206.]
 [   1228.    2695.     499.  158184.     208.]
 [   1267.    1798.     721.  147554.     194.]
 [   1267.    2032.     721.  149058.     196.]
 [   1267.    2188.     943.  156663.     206.]
 [   1306.    2227.    1054.  161226.     212.]
 [   1345.    1603.     943.  156663.     206.]
 [   1345.    2773.     943.  156663.     206.]
 [   1384.    2383.     721.  147537.     194.]
 [   1384.    2500.     277.  156663.     206.]
 [   1423.    2110.      55.  156663.     206.]
 [   1423.    2110.     943.  156663.     206.]
 [   1423.    2383.     721.  147537.     194.]
 [   1462.    1681.     943.  156663.     206.]
 [   1540.    1603.     832.  147537.     194.]
 [   1540.    2032.     943.  156663.     206.]
 [   1540.    2149.    1165.  158184.     208.]
 [   1579.    1720.    1165.  159705.     210.]
 [   1579.    1915.     832.  147537.     194.]
 [   1657.    2812.     721.  149058.     196.]
 [   1696.    2032.     721.  150579.     198.]
 [   1735.    2110.     943.  155142.     204.]
 [   1735.    2383.     943.  156663.     206.]
 [   1735.    2812.     721.  149058.     196.]
 [   1735.    2968.     610.  117117.     154.]
 [   1813.    1564.     277.  158184.     208.]
 [   1813.    1720.     721.  147537.     194.]
 [   1813.    2110.      55.  156663.     206.]
 [   1813.    2422.    1165.  158184.     208.]
 [   1852.    1876.     610.  139932.     184.]
 [   1891.    1720.    1054.  164268.     216.]
 [   1891.    2110.     166.  135369.     178.]
 [   1891.    2539.    1054.  162747.     214.]
 [   1891.    2734.     832.  147537.     194.]
 [   1930.    2110.     832.  147537.     194.]
 [   1930.    2344.     277.  156663.     206.]
 [   1969.    2305.     277.  156660.     206.]
 [   2008.    2188.     388.  149058.     196.]
 [   2047.    1681.    1054.  164268.     216.]
 [   2047.    2188.     388.  149058.     196.]
 [   2125.    2188.     943.  156663.     206.]
 [   2164.    2032.    1054.  162747.     214.]
 [   2164.    2266.     943.  156663.     206.]
 [   2164.    2656.     943.  156663.     206.]
 [   2203.    1798.     277.  155142.     204.]
 [   2203.    2032.     832.  147537.     194.]
 [   2203.    2695.     721.  149058.     196.]
 [   2203.    2773.     721.  149058.     196.]
 [   2242.    1759.     721.  146016.     192.]
 [   2242.    1798.    1054.  164268.     216.]
 [   2242.    2110.     610.  135369.     178.]
 [   2242.    2227.    1054.  162733.     214.]
 [   2242.    2383.    1054.  162747.     214.]
 [   2281.    1759.     388.  146791.     193.]
 [   2281.    2656.     166.  135369.     178.]
 [   2359.    1603.     610.  142974.     188.]
 [   2359.    2734.     610.  121672.     160.]
 [   2398.    1759.    1054.  164268.     216.]
 [   2398.    1798.    1054.  164268.     216.]
 [   2398.    2227.     832.  147537.     194.]
 [   2437.    2266.     388.  147537.     194.]
 [   2437.    2305.     721.  149054.     196.]
 [   2476.    2188.     721.  147537.     194.]
 [   2515.    1837.     388.  149058.     196.]
 [   2515.    2929.     499.  155142.     204.]
 [   2554.    1759.    1165.  158184.     208.]
 [   2554.    1915.     277.  156663.     206.]
 [   2554.    1915.     388.  149050.     196.]
 [   2554.    2773.     277.  153621.     202.]
 [   2593.    1837.     943.  156663.     206.]
 [   2593.    2188.    1054.  161226.     212.]
 [   2632.    1642.     943.  156663.     206.]
 [   2632.    1954.    1165.  158184.     208.]
 [   2671.    1759.     499.  155142.     204.]
 [   2710.    1798.      55.  155142.     204.]
 [   2710.    2695.     388.  146016.     192.]
 [   2710.    2734.     721.  147537.     194.]
 [   2749.    2110.     277.  156663.     206.]
 [   2749.    2188.     277.  155146.     204.]
 [   2749.    2344.     166.  136890.     180.]
 [   2788.    1954.     166.  138411.     182.]
 [   2788.    1954.     388.  147537.     194.]
 [   2788.    2071.     277.  156663.     206.]
 [   2788.    2110.     388.  147537.     194.]
 [   2866.    2383.     721.  149058.     196.]
 [   2866.    2617.     166.  136882.     180.]
 [   2905.    2227.     610.  130806.     172.]
 [   2944.    2383.     499.  156663.     206.]
 [   3061.    2578.     166.  136890.     180.]
 [   3139.    1798.      55.  153621.     202.]
 [   3139.    2032.    1165.  158184.     208.]
 [   3178.    2695.     610.  123201.     162.]
 [   3217.    2032.    1165.  158184.     208.]
 [   3217.    2461.     166.  136890.     180.]
 [   3217.    2773.     499.  155142.     204.]
 [   3256.    2461.      55.  149813.     197.]
 [   3373.    2110.    1165.  158182.     208.]
 [   3373.    2383.     721.  147537.     194.]
 [   3412.    2656.     721.  147537.     194.]
 [   3451.    3007.    1054.  155142.     204.]]

In [95]:
import math
fig, ax = plt.subplots(1,2,sharey = True, figsize=(20,5))
weights = np.ones_like(spike_thres[:,3])/len(spike_thres[:,3])
weights2 = np.ones_like(data_thresholded[:,3])/len(data_thresholded[:,3])

ax[0].hist(data_thresholded[:,3], bins = 100, alpha = 0.5, weights = weights2, label = 'all data')
ax[0].hist(spike_thres[:,3], bins = 100, alpha = 0.5, weights = weights, label = 'spike')
ax[0].legend(loc='upper right')
ax[0].set_title('Histogram of Unmasked values in the Spike vs All Data')

weights = np.ones_like(spike_thres[:,4])/len(spike_thres[:,4])
weights2 = np.ones_like(data_thresholded[:,4])/len(data_thresholded[:,4])

ax[1].hist(data_thresholded[:,4], bins = 100, alpha = 0.5, weights = weights2, label = 'all data')
ax[1].hist(spike_thres[:,4], bins = 100, alpha = 0.5, weights = weights, label = 'spike')
ax[1].legend(loc='upper right')
ax[1].set_title('Histogram of Synapses in the Spike vs All Data')

plt.show()


5) Boxplot of different clusters by coordinates and densities

Cluster 4 has relatively high density


In [48]:
import sklearn.mixture as mixture

n_clusters = 4
gmm = mixture.GMM(n_components=n_clusters, n_iter=1000, covariance_type='diag')
labels = gmm.fit_predict(syn_unmasked)
clusters = []
for l in range(n_clusters):
    a = np.where(labels == l)
    clusters.append(syn_unmasked[a,:])

print len(clusters)
print clusters[0].shape

counter = 0
indx = 0
indy = 0
for cluster in clusters:
    s = cluster.shape
    cluster = cluster.reshape((s[1], s[2]))
    counter += 1
    print 
    print'Working on cluster: ' + str(counter)
    plt.boxplot(cluster[:,-1], 0, 'gD', showmeans=True)
    plt.xticks([1])
    plt.ylabel('Density')
    plt.title('Boxplot of density \n at cluster = ' + str(int(counter)))
    plt.show()
    
 
    print "Done with cluster"
plt.show()


4
(1L, 10509L, 4L)

Working on cluster: 1
Done with cluster

Working on cluster: 2
Done with cluster

Working on cluster: 3
Done with cluster

Working on cluster: 4
Done with cluster

5 OLD ) Boxplot distrubutions of each Z layer


In [47]:
data_uniques, UIndex, UCounts = np.unique(syn_unmasked[:,2], return_index = True, return_counts = True)
'''
print 'uniques'
print 'index: ' + str(UIndex)
print 'counts: ' + str(UCounts)
print 'values: ' + str(data_uniques)
'''
fig, ax = plt.subplots(3,4,figsize=(10,20))
counter = 0

for i in np.unique(syn_unmasked[:,2]):
    # print 'calcuating for z: ' + str(int(i))
    
    def check_z(row):
        if row[2] == i:
            return True
        return False
   
    counter += 1
    xind = (counter%3) - 1
    yind = (counter%4) - 1
     
    index_true = np.where(np.apply_along_axis(check_z, 1, syn_unmasked))
    syn_uniqueZ = syn_unmasked[index_true]
    
    ax[xind,yind].boxplot(syn_uniqueZ[:,3], 0, 'gD')
    ax[xind,yind].set_xticks([1], i)
    ax[xind,yind].set_ylabel('Density')
    ax[xind,yind].set_title('Boxplot at \n z = ' + str(int(i)))

#print 'yind = %d, xind = %d' %(yind,xind)
#print i

ax[xind+1,yind+1].boxplot(syn_uniqueZ[:,3], 0, 'gD',showmeans=True)
ax[xind+1,yind+1].set_xticks([1], 'set')
ax[xind+1,yind+1].set_ylabel('Density')
ax[xind+1,yind+1].set_title('Boxplot for \n All Densities')

print "Density Distrubtion Boxplots:"
plt.tight_layout()

plt.show()


Density Distrubtion Boxplots:

In [ ]:


In [ ]: