In [33]:
# Import packages
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpl_toolkits.mplot3d import Axes3D
import scipy.stats as ss
import pandas as pd
import numpy as np
import seaborn as sns
from scipy.optimize import curve_fit
from pylab import *
import itertools
from lmfit import Model
np.random.seed(12345678) # for reproducibility, set random seed
# Read in data
df = pd.read_csv('../output.csv')
nvox = 64*64*48 # assume number of voxels per bin
df['weighted'] = df['synapses']/df['unmasked']*nvox
xvals = df['cx'].unique()
yvals = df['cy'].unique()
zvals = df['cz'].unique()
# Get rid of the blank edges
bottom = 0;
top = len(xvals);
right = 0;
left = len(yvals);
for z in zvals:
this_z = df[df['cz']==z]
# X direction
xhist, bin_edges = np.histogram(this_z['cx'], weights = this_z['unmasked']/(nvox*len(yvals)), bins=len(xvals))
bottom = max(bottom, np.argmax(xhist>0.5))
top = min(top, len(xvals)-np.argmax(xhist[::-1]>0.5))
# Y direction
yhist, bin_edges = np.histogram(this_z['cy'], weights = this_z['unmasked']/(nvox*len(xvals)), bins=len(yvals))
right = max(right, np.argmax(yhist>0.5))
left = min(left, len(yvals)-np.argmax(yhist[::-1]>0.5))
# Copy new dataset without edges
df2 = df.copy()
for z in zvals:
df2.drop(df2.index[(df2['cx']<xvals[bottom]) | (df2['cx']>=xvals[top])], inplace=True)
df2.drop(df2.index[(df2['cy']<yvals[right]) | (df2['cy']>=yvals[left])], inplace=True)
xvals = df2['cx'].unique()
yvals = df2['cy'].unique()
zvals = df2['cz'].unique()
df2.head()
Out[33]:
In [2]:
dfxy = pd.pivot_table(df2, index='cy', columns='cx', values='weighted', aggfunc=np.sum)
df3 = dfxy.unstack().rename('weighted').reset_index()
df3.head()
Out[2]:
In [3]:
from numpy.polynomial import polynomial as P
def polyfit2d(x, y, f, deg):
x = np.asarray(x)
y = np.asarray(y)
f = np.asarray(f)
deg = np.asarray(deg)
vander = P.polyvander2d(x, y, deg)
vander = vander.reshape((-1,vander.shape[-1]))
f = f.reshape((vander.shape[0],))
c = np.linalg.lstsq(vander, f)[0]
return c.reshape(deg+1)
x_point = np.array(df3['cx'])
y_point = np.array(df3['cy'])
synapses = np.array(df3['weighted'])
for deg in range(5):
popt = polyfit2d(x_point, y_point, synapses, deg=[deg, deg])
chi_squared = np.sum((P.polyval2d(x_point, y_point, popt) - synapses) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
print 'Testing polynomial model of degree =', deg
print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print
popt = polyfit2d(np.log(x_point), np.log(y_point), synapses, deg=[1, 1])
chi_squared = np.sum((P.polyval2d(np.log(x_point), np.log(y_point), popt) - synapses) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
print 'Testing logarithmic model'
print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print
popt = polyfit2d(np.log(x_point), np.log(y_point), np.log(synapses), deg=[1, 1])
chi_squared = np.sum((np.exp(P.polyval2d(np.log(x_point), np.log(y_point), popt)) - synapses) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
print 'Testing powerlaw model'
print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print
print '2D polynomial of degree = 2 gives the best fit'
In [4]:
popt = polyfit2d(x_point, y_point, synapses, deg=[2, 2])
pred = P.polyval2d(x_point, y_point, popt)
df3['pred'] = pred
sns.set_style('white')
fs = 18
tfs = 14
ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.scatter(xs = df3['cx'], ys = df3['cy'], zs = df3['weighted'],
s = df3['weighted']/100, c = df3['weighted'], cmap = 'winter',
edgecolors = [0,0,0,0])
plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(df2['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(df2['weighted'].unique()[::3])
ax.view_init(azim = 35)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Actual Distribution of Synapses in X-Y plane", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()
X, Y = np.meshgrid(xvals, yvals)
Z = pd.pivot_table(df3, index='cy', columns='cx', values='pred')
ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap = 'winter',
linewidth=0, antialiased=False)
plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(df2['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(df2['weighted'].unique()[::3])
ax.view_init(azim = 35)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Fitted Distribution of Synapses in X-Y plane", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()
ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap = 'winter',
linewidth=0, antialiased=False)
patches = ax.scatter(xs = df3['cx'], ys = df3['cy'], zs = df3['weighted'],
s = df3['weighted']/100, c = df3['weighted'], cmap = 'winter',
edgecolors = [0,0,0,0])
plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(df2['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(df2['weighted'].unique()[::3])
ax.view_init(azim = 35)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Actual and Fitted Distribution of Synapses in X-Y plane", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()
In [5]:
residual = abs(pred - df3['weighted'])
sns.set_style('white')
fs = 18
tfs = 14
ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.scatter(xs = df3['cx'], ys = df3['cy'], zs = residual,
s = residual/10, c = residual, cmap = 'winter',
edgecolors = [0,0,0,0])
plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(dfout['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(dfout['weighted'].unique()[::3])
ax.view_init(azim = 45)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Difference of Actual and Fitted Number of Synapses", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()
In [6]:
mean = df2['weighted'].mean()
ymeans = [df2[df2['cy']==y]['weighted'].mean() for y in yvals]
df4 = df2.copy()
for y, ymean in zip(yvals, ymeans):
df4.loc[df4['cy']==y, 'weighted'] = df4.loc[df4['cy']==y, 'weighted'] - (ymean - mean)
df4.head()
Out[6]:
In [7]:
fs = 18
bottom = df4['weighted'].min()
top = df4['weighted'].max()
sns.set_style('white')
avals_list = [xvals, yvals, zvals]
anames = ['cx', 'cy', 'cz']
alabels = ['X (um)', 'Y (um)', 'Z (um)']
ascales = [2, 2, 1]
for avals, aname, alabel, ascale in zip(avals_list, anames, alabels, ascales):
delta = 2*(avals[1]-avals[0])
extra = delta / (np.sqrt(3)/2)
left = np.min(avals) - extra
right = np.max(avals) + extra
with sns.plotting_context('notebook', font_scale=1.5):
g = sns.jointplot(x=aname, y='weighted', data = df4, kind='hex',
joint_kws={'gridsize':len(avals)/ascale+1, 'extent':(left, right, bottom, top)},
marginal_kws={'bins':len(avals)/ascale})
g = g.plot_joint(sns.regplot, scatter=False, color='pink', line_kws={'linewidth': 4})
sns.axlabel(alabel, 'Weighted synapses', fontsize=fs)
In [8]:
dfxy = pd.pivot_table(df4, index='cy', columns='cx', values='weighted', aggfunc=np.sum)
df5 = dfxy.unstack().rename('weighted').reset_index()
df5.head()
Out[8]:
In [9]:
from numpy.polynomial import polynomial as P
def polyfit2d(x, y, f, deg):
x = np.asarray(x)
y = np.asarray(y)
f = np.asarray(f)
deg = np.asarray(deg)
vander = P.polyvander2d(x, y, deg)
vander = vander.reshape((-1,vander.shape[-1]))
f = f.reshape((vander.shape[0],))
c = np.linalg.lstsq(vander, f)[0]
return c.reshape(deg+1)
x_point = np.array(df5['cx'])
y_point = np.array(df5['cy'])
synapses = np.array(df5['weighted'])
for deg in range(5):
popt = polyfit2d(x_point, y_point, synapses, deg=[deg, deg])
chi_squared = np.sum((P.polyval2d(x_point, y_point, popt) - synapses) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
print 'Testing polynomial model of degree =', deg
print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print
popt = polyfit2d(np.log(x_point), np.log(y_point), synapses, deg=[1, 1])
chi_squared = np.sum((P.polyval2d(np.log(x_point), np.log(y_point), popt) - synapses) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
print 'Testing logarithmic model'
print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print
popt = polyfit2d(np.log(x_point), np.log(y_point), np.log(synapses), deg=[1, 1])
chi_squared = np.sum((np.exp(P.polyval2d(np.log(x_point), np.log(y_point), popt)) - synapses) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
print 'Testing powerlaw model'
print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print
print '2D polynomial of degree = 2 gives the best fit'
In [10]:
popt = polyfit2d(x_point, y_point, synapses, deg=[2, 2])
pred = P.polyval2d(x_point, y_point, popt)
df5['pred'] = pred
sns.set_style('white')
fs = 18
tfs = 14
ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.scatter(xs = df5['cx'], ys = df5['cy'], zs = df5['weighted'],
s = df5['weighted']/100, c = df5['weighted'], cmap = 'winter',
edgecolors = [0,0,0,0])
plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(df2['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(df2['weighted'].unique()[::3])
ax.view_init(azim = 35)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Actual Distribution of Synapses in X-Y plane", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()
X, Y = np.meshgrid(xvals, yvals)
Z = pd.pivot_table(df5, index='cy', columns='cx', values='pred')
ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap = 'winter',
linewidth=0, antialiased=False)
plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(df2['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(df2['weighted'].unique()[::3])
ax.view_init(azim = 35)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Fitted Distribution of Synapses in X-Y plane", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()
ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap = 'winter',
linewidth=0, antialiased=False)
patches = ax.scatter(xs = df5['cx'], ys = df5['cy'], zs = df5['weighted'],
s = df5['weighted']/100, c = df5['weighted'], cmap = 'winter',
edgecolors = [0,0,0,0])
plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(df2['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(df2['weighted'].unique()[::3])
ax.view_init(azim = 35)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Actual and Fitted Distribution of Synapses in X-Y plane", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()
In [11]:
residual = abs(pred - df5['weighted'])
sns.set_style('white')
fs = 18
tfs = 14
ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.scatter(xs = df5['cx'], ys = df5['cy'], zs = residual,
s = residual/10, c = residual, cmap = 'winter',
edgecolors = [0,0,0,0])
plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(dfout['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(dfout['weighted'].unique()[::3])
ax.view_init(azim = 45)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Difference of Actual and Fitted Number of Synapses", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()
In [10]:
X = df2.loc[:,('cx','cy','cz')]
Xnew = X.copy()
for i, x in enumerate(xvals):
Xnew.loc[Xnew['cx']==x, 'cx'] = i
for j, y in enumerate(yvals):
Xnew.loc[Xnew['cy']==y, 'cy'] = j
for k, z in enumerate(zvals):
Xnew.loc[Xnew['cz']==z, 'cz'] = k
Xnew.columns = ['i', 'j', 'k']
Xnew.head()
Out[10]:
In [29]:
from sklearn.cluster import DBSCAN
# Compute DBSCAN
db = DBSCAN(eps=1, min_samples=2000).fit(Xnew, sample_weight=df2['weighted'])
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_
# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
print('Estimated number of clusters: %d' % n_clusters_)
In [47]:
# Plot result
sns.set_style('white')
fs = 18
tfs = 14
# Black removed and is used for noise instead.
ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
unique_labels = set(labels)
colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))
for k, col in zip(unique_labels, colors):
if k == -1:
# Black used for noise.
col = 'k'
class_member_mask = (labels == k)
xyz = X[class_member_mask & core_samples_mask]
ax.scatter(xyz['cx'], xyz['cy'], xyz['cz'], c=col)
plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(dfout['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(dfout['weighted'].unique()[::3])
ax.view_init(azim = 45)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title('Estimated number of clusters: %d' % n_clusters_, fontsize=fs)
# plt.colorbar(patches)
plt.tight_layout()
plt.show()
In [60]:
# Plot result
sns.set_style('white')
fs = 18
tfs = 14
# Black removed and is used for noise instead.
ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
unique_labels = set(labels)
colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))
print "Cluster sizes:"
for k, col in zip(unique_labels, colors):
if k == -1:
# Black used for noise.
col = 'k'
class_member_mask = (labels == k)
xyz = X[class_member_mask & core_samples_mask]
if len(xyz)>1000:
print "group", k, ":", np.sum(class_member_mask)
ax.scatter(xyz['cx'], xyz['cy'], xyz['cz'], c=col)
xyz = X[class_member_mask & ~core_samples_mask]
ax.scatter(xyz['cx'], xyz['cy'], xyz['cz'], c=col, s=6)
plt.xticks(xvals[::20], fontsize=tfs)
plt.yticks(yvals[::10], fontsize=tfs)
#plt.zticks(dfout['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(dfout['weighted'].unique()[::3])
ax.view_init(azim = 45)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title('Large clusters', fontsize=fs)
# plt.colorbar(patches)
plt.tight_layout()
plt.show()
In [69]:
fs = 18
tfs = 14
df2['label'] = labels
for k in [0, 83]:
plt.figure(figsize=(16,8))
print "Group", k
for zi, z in enumerate(zvals):
XY = pd.pivot_table(df2[df2['cz']==z], index='cy', columns='cx', values='weighted')
XYlabel = pd.pivot_table(df2[df2['cz']==z], index='cy', columns='cx', values='label')
XYmask = XYlabel != k
plt.subplot(3,4,1+zi)
sns.heatmap(XY, xticklabels=40, yticklabels=30,
vmin=df2['weighted'].min(), vmax=df2['weighted'].max(), cbar=False, cmap=cm.get_cmap('winter'),
mask=XYmask)
plt.gca().set_title('Z = '+str(z)+' um', fontsize=fs)
plt.gca().set_xlabel('X (um)', fontsize=fs)
plt.gca().set_ylabel('Y (um)', fontsize=fs)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.tight_layout()
plt.show()