In [1]:
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import urllib2
import scipy.stats as stats
np.set_printoptions(precision=3, suppress=True)
url = ('https://raw.githubusercontent.com/Upward-Spiral-Science'
'/data/master/syn-density/output.csv')
data = urllib2.urlopen(url)
csv = np.genfromtxt(data, delimiter=",")[1:] # don't want first row (labels)
# chopping data based on thresholds on x and y coordinates
x_bounds = (409, 3529)
y_bounds = (1564, 3124)
def check_in_bounds(row, x_bounds, y_bounds):
if row[0] < x_bounds[0] or row[0] > x_bounds[1]:
return False
if row[1] < y_bounds[0] or row[1] > y_bounds[1]:
return False
if row[3] == 0:
return False
return True
indices_in_bound, = np.where(np.apply_along_axis(check_in_bounds, 1, csv,
x_bounds, y_bounds))
data_thresholded = csv[indices_in_bound]
n = data_thresholded.shape[0]
def synapses_over_unmasked(row):
s = (row[4]/row[3])*(64**3)
return [row[0], row[1], row[2], s]
syn_unmasked = np.apply_along_axis(synapses_over_unmasked, 1, data_thresholded)
syn_normalized = syn_unmasked
print 'end setup'
In [2]:
import sklearn.mixture as mixture
n_clusters = 4
gmm = mixture.GMM(n_components=n_clusters, n_iter=1000, covariance_type='diag')
labels = gmm.fit_predict(syn_unmasked)
clusters = []
for l in range(n_clusters):
a = np.where(labels == l)
clusters.append(syn_unmasked[a,:])
print len(clusters)
print clusters[0].shape
In [3]:
from mpl_toolkits.mplot3d import Axes3D
counter = 0
for cluster in clusters:
s = cluster.shape
cluster = cluster.reshape((s[1], s[2]))
counter += 1
print
print'Visualization of cluster: ' + str(counter)
X = cluster[:, (0,1,2)] # x,y,z
Y = cluster[:,-1] # syn/unmasked from spike
fig = plt.figure(figsize=(10, 7))
ax = fig.gca(projection='3d')
ax.view_init()
ax.dist = 10 # distance
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
title = 'Scatter Plot of cluster #:' + str(counter)
ax.set_title(title)
ax.set_xticks(np.arange(min(cluster[:,0]), max(cluster[:,0])+1, 800))
ax.set_yticks(np.arange(min(cluster[:,1]), max(cluster[:,1])+1, 500))
ax.set_zticks(np.arange(min(cluster[:,2]), max(cluster[:,2])+1, 300))
ax.scatter(
cluster[:, 0], cluster[:, 1], cluster[:, 2],
#c = counter
) # data
# marker colour
plt.show()
In [5]:
counter = 0
fig, axs = plt.subplots(1,4,figsize=(25,5))
for cluster in clusters:
s = cluster.shape
cluster = cluster.reshape((s[1], s[2]))
counter += 1
print
print'Working on cluster: ' + str(counter)
weights = np.ones_like(cluster[:,-1])/len(cluster[:,-1])
weights2 = np.ones_like(syn_unmasked[:,-1])/len(syn_unmasked[:,-1])
figure = plt.figure()
axs[counter-1].hist(cluster[:,-1], bins = 100, alpha = 0.5, weights = weights, label = 'Cluster#:'+str(counter))
axs[counter-1].hist(syn_unmasked[:,-1], bins = 100, alpha = 0.5, weights = weights2, label = 'all data')
axs[counter-1].set_title('Relative Frequency of \n Density w/in Cluster#: '+ str(counter))
axs[counter-1].set_xlabel('Density')
axs[counter-1].set_ylabel('Relative Frequency')
axs[counter-1].legend()
print "Done with cluster"
plt.show()
Back to 4 clusters, but similar visually to HW 9
In [6]:
n_clusters = 4
###########################################
gmm = mixture.GMM(n_components=n_clusters, n_iter=1000, covariance_type='diag', random_state=1)
clusters = [[] for i in xrange(n_clusters)]
centroidmatrix = [0]*4
print centroidmatrix
predicted = gmm.fit_predict(syn_unmasked)
for label, row in zip(predicted, syn_unmasked[:,]):
clusters[label].append(row)
for i in xrange(n_clusters):
clusters[i] = np.array(clusters[i])
print "# of samples in cluster %d: %d" % (i+1, len(clusters[i]))
print "centroid: ", np.average(clusters[i], axis=0)
centroidmatrix = np.vstack((centroidmatrix,np.average(clusters[i], axis=0)))
plt.show
centroidmatrix = np.delete(centroidmatrix,0,0)
print centroidmatrix
fig = plt.figure(figsize=(10, 7))
ax = fig.gca(projection='3d')
ax.scatter(
centroidmatrix[:, 0], centroidmatrix[:, 1], centroidmatrix[:, 2], # data
c='blue', # marker colour
marker='o', # marker shape
s=centroidmatrix[:,3]*10 # marker size
)
ax.set_xlabel('x (voxels)')
ax.set_ylabel('z (voxels)')
ax.set_zlabel('y (voxels)')
plt.title('Location of Centroids \n Scaled by average density w/in Cluster')
plt.show
Out[6]:
In [59]:
fig = plt.figure(figsize=(10, 7))
ax = fig.gca(projection='3d')
ax.scatter(
centroidmatrix[:, 0], centroidmatrix[:, 1], centroidmatrix[:, 2], # data
c='blue', # marker colour
marker='o', # marker shape
s=centroidmatrix[:,3]*10 # marker size
)
ax.set_xlabel('x (voxels)')
ax.set_ylabel('z (voxels)')
ax.set_zlabel('y (voxels)')
plt.title('Location of Centroids \n Scaled by average density w/in Cluster')
#for angle in range(0, 360):
ax.view_init(45, 45)
plt.show()
Fix boxplots from hw 11
In [61]:
counter = 0
indx = 0
indy = 0
boxclusters = []
data = []
for cluster in clusters:
s = cluster.shape
print s
#cluster = cluster.reshape((s[1], s[2]))
counter += 1
print
print'Working on cluster: ' + str(counter)
plt.boxplot(cluster[:,-1], 0, 'gD', showmeans=True)
plt.xticks([1])
plt.ylabel('Density')
plt.title('Distribution of density \n w/in cluster = ' + str(int(counter)))
plt.show()
# boxcluster = np.append(boxclusters, cluster[:,-1], 0)
print "Done with cluster"
'''
data = (boxcluster[:,0],boxcluster[:,1],boxcluster[:,2],boxcluster[:,3])
plt.figure
plt.boxplot(data)
plt.show()
'''
Out[61]:
In [ ]: