Assignment 12


In [1]:
# Import packages
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpl_toolkits.mplot3d import Axes3D
import scipy.stats as ss
import pandas as pd
import numpy as np
import seaborn as sns
from scipy.optimize import curve_fit
from pylab import *
import itertools
from lmfit import  Model

np.random.seed(12345678)  # for reproducibility, set random seed

# Read in data
df = pd.read_csv('../output.csv')

nvox = 64*64*48 # assume number of voxels per bin
df['weighted'] = df['synapses']/df['unmasked']*nvox

xvals = df['cx'].unique()
yvals = df['cy'].unique()
zvals = df['cz'].unique()

# Get rid of the blank edges
bottom = 0;
top = len(xvals);
right = 0;
left = len(yvals);
for z in zvals:
    this_z = df[df['cz']==z]
    
    # X direction
    xhist, bin_edges = np.histogram(this_z['cx'], weights = this_z['unmasked']/(nvox*len(yvals)), bins=len(xvals))
    
    bottom = max(bottom, np.argmax(xhist>0.5))
    top = min(top, len(xvals)-np.argmax(xhist[::-1]>0.5))
    
    # Y direction
    yhist, bin_edges = np.histogram(this_z['cy'], weights = this_z['unmasked']/(nvox*len(xvals)), bins=len(yvals))
    
    right = max(right, np.argmax(yhist>0.5))
    left = min(left, len(yvals)-np.argmax(yhist[::-1]>0.5))

# Copy new dataset without edges
df2 = df.copy()
for z in zvals:
    df2.drop(df2.index[(df2['cx']<xvals[bottom]) | (df2['cx']>=xvals[top])], inplace=True)
    df2.drop(df2.index[(df2['cy']<yvals[right]) | (df2['cy']>=yvals[left])], inplace=True)

xvals = df2['cx'].unique()
yvals = df2['cy'].unique()
zvals = df2['cz'].unique()

df2.head()


Out[1]:
cx cy cz unmasked synapses weighted
6292 448 1369 55 126357 153 238.063772
6293 448 1369 166 139932 207 290.840237
6294 448 1369 277 150269 194 253.824488
6295 448 1369 388 138071 159 226.410122
6296 448 1369 499 150842 258 336.278119

Simulate null data for correlation matrix


In [2]:
out = pd.pivot_table(df2, index=('cx','cy'), columns='cz', values='weighted')

mean = df2['weighted'].mean()
std = df2['weighted'].std()

nullout = pd.DataFrame(np.random.normal(mean, std, out.shape), index=out.index, columns=out.columns)
nullout.head()


Out[2]:
cz 55 166 277 388 499 610 721 832 943 1054 1165
cx cy
448 1369 282.909329 132.419062 144.755747 128.678597 359.013795 168.591601 277.452126 288.284064 295.514951 309.565319 245.443306
1408 136.867059 229.237705 191.484825 294.739551 305.453857 254.711574 321.432070 279.385621 294.856040 291.717984 319.917457
1447 238.740409 245.685176 385.094346 321.635951 305.627830 292.217420 153.078339 172.087045 223.989824 168.171703 337.086460
1486 325.164232 321.013791 321.121023 339.646971 293.453119 369.907751 269.788765 201.602479 257.147108 304.713402 348.893949
1525 298.292937 319.632342 375.645847 211.721892 290.743462 212.433465 360.854079 251.554855 174.391271 239.195915 328.032179

In [9]:
corr = np.corrcoef(nullout)

fs = 18
lfs = 14

plt.figure()
sns.heatmap(corr, square=True, xticklabels=1000, yticklabels=1000)
plt.xlabel('Bin index', fontsize=fs)
plt.ylabel('Bin index', fontsize=fs)
plt.title('Correlation coefficients', fontsize=fs)
plt.tick_params(axis='both', which='major', labelsize=lfs)
cax = plt.gcf().axes[-1]
cax.tick_params(labelsize=lfs)
plt.savefig('./figs/weighted_synapses/bin_correlations_null.png', format='png', dpi=300)
plt.show()



In [4]:
diag = corr.diagonal() * np.eye(corr.shape[0])
hollow = corr - diag
d_det = np.linalg.det(diag)
h_det = np.linalg.det(hollow)

print "Ratio of on- and off-diagonal determinants: " + str(d_det / h_det)


Ratio of on- and off-diagonal determinants: 9.50970723979e-26

There are a lot of off-diagonal correlations, despite the samples being independent. This is not a good test for independence (sample size = 11 is too small).


In [8]:
corr2 = np.corrcoef(nullout.T)

fs = 18
lfs = 14

plt.figure()
sns.heatmap(corr2, square=True, xticklabels=1000, yticklabels=1000)
plt.xlabel('Bin index', fontsize=fs)
plt.ylabel('Bin index', fontsize=fs)
plt.title('Correlation coefficients', fontsize=fs)
plt.tick_params(axis='both', which='major', labelsize=lfs)
cax = plt.gcf().axes[-1]
cax.tick_params(labelsize=lfs)
plt.savefig('./figs/weighted_synapses/Z_correlations_null.png', format='png', dpi=300)
plt.show()



In [5]:
diag = corr2.diagonal() * np.eye(corr2.shape[0])
hollow = corr2 - diag
d_det = np.linalg.det(diag)
h_det = np.linalg.det(hollow)

print "Ratio of on- and off-diagonal determinants: " + str(d_det / h_det)


Ratio of on- and off-diagonal determinants: 2.11931681096e+15

In [15]:
fs = 18

bottom = df2['weighted'].min()
top = df2['weighted'].max()

sns.set_style('white')

avals_list = [xvals, yvals, zvals]
anames = ['cx', 'cy', 'cz']
alabels = ['X (um)', 'Y (um)', 'Z (um)']
titles = ['X', 'Y', 'Z']
ascales = [2, 2, 1]

for avals, aname, alabel, ascale, title in zip(avals_list, anames, alabels, ascales, titles):
    delta = 2*(avals[1]-avals[0])
    extra = delta / (np.sqrt(3)/2)
    left = np.min(avals) - extra
    right = np.max(avals) + extra

    with sns.plotting_context('notebook', font_scale=1.5):
        g = sns.jointplot(x=aname, y='weighted', data = df2, kind='hex',
                          joint_kws={'gridsize':len(avals)/ascale+1, 'extent':(left, right, bottom, top)},
                          marginal_kws={'bins':len(avals)/ascale})
        g = g.plot_joint(sns.regplot, scatter=False, color='pink', line_kws={'linewidth': 4})
        sns.axlabel(alabel, 'Weighted synapses', fontsize=fs)
        plt.gcf().axes[1].set_title('Distribution of weighted synapses along '+title)
        plt.savefig('./figs/weighted_synapses/'+title+'_jointplot.png', format='png', dpi=300)
        plt.show()



In [6]:
fs = 18

sns.set_style('white')

avals_list = [xvals, yvals, zvals]
anames = ['cx', 'cy', 'cz']
alabels = ['X (um)', 'Y (um)', 'Z (um)']

for avals, aname, alabel in zip(avals_list, anames, alabels):
    delta = 2*(avals[1]-avals[0])
    extra = delta / (np.sqrt(3)/2)
    left = np.min(avals) - extra
    right = np.max(avals) + extra

    with sns.plotting_context('notebook', font_scale=1.5):
        g = sns.jointplot(x=aname, y='weighted', data = df2, kind='reg',
                          scatter_kws={'alpha':0.03}, 
                          line_kws={'color':'pink'}, 
                          marginal_kws={'bins':len(avals)})
        plt.xlim([left, right])
        sns.axlabel(alabel, 'Weighted synapses', fontsize=fs)


Plotting distribution of clustered groups


In [11]:
from sklearn.cluster import KMeans

optn = 6

est = KMeans(n_clusters = optn)
est.fit(df2['weighted'].reshape(-1,1))
labels = est.labels_
df2['label'] = labels

# Sort groups by means
groups = np.sort(df2['label'].unique())

means = np.array([df2.loc[df2['label']==label,'weighted'].mean() for label in groups])
reindex = np.argsort(means)

new_labels = np.array(labels)

for label in groups:
    new_labels[labels==label] = np.argmax(reindex==label)

df2['label'] = new_labels

means = np.array([df2.loc[df2['label']==label,'weighted'].mean() for label in groups])
print means
counts = np.array([df2.loc[df2['label']==label,'weighted'].count() for label in groups])
print counts

df2.head()


[  81.26724863  157.29704553  215.56281452  262.02986018  309.17641811
  370.52532023]
[2641 4484 8337 9968 7109 2573]
Out[11]:
cx cy cz unmasked synapses weighted label
6292 448 1369 55 126357 153 238.063772 2
6293 448 1369 166 139932 207 290.840237 4
6294 448 1369 277 150269 194 253.824488 3
6295 448 1369 388 138071 159 226.410122 2
6296 448 1369 499 150842 258 336.278119 4

In [12]:
# Plot distributions of our dataset
from scipy.stats import norm

priors = np.array([np.float(count)/np.sum(counts) for count in counts])
# print groups, priors

sns.set_style('darkgrid')

_, bin_edges = np.histogram(df2['weighted'], bins=50)

fs = 18

with sns.plotting_context('notebook', font_scale=1.5):
    plt.figure()
    for label, prior in zip(groups, priors):
        g = sns.distplot(df2[df2['label']==label]['weighted'], bins=bin_edges, kde=False, fit=norm, 
                         label=str(label), hist_kws={'edgecolor': 'none'})

    # weight fit by priors
    for label, prior in zip(groups, priors):
        fit = (plt.gca().lines[label].get_data()[0], prior*plt.gca().lines[label].get_data()[1])
        plt.gca().lines[label].set_data(fit)

    # weight histograms by priors
    scale = -1
    for i in plt.gca().patches:
        lstr = i.get_label()
        if len(lstr)==1:
            label = int(i.get_label())
            scale = priors[label]
        if scale != -1:
            h = i.get_height()
            i.set_height(scale*h)

    plt.gca().set_ylim(0,0.009)
    plt.xlabel('Weighted synapses', fontsize=fs)
    plt.ylabel('Probability', fontsize=fs)
    plt.title('PDF', fontsize=fs)
    plt.legend()
    plt.xticks(np.arange(0,700,200))
    plt.yticks(np.arange(0,0.01,0.003))
    plt.savefig('./figs/weighted_synapses/K-means_group_PDF.png', format='png', dpi=300)
    plt.show()



In [9]:
from matplotlib import ticker

fs = 18
tfs = 14

with sns.plotting_context('notebook', font_scale=1.5):
    g = sns.PairGrid(df2, hue='label', vars=['weighted'])
    g = g.map_diag(plt.hist, bins=50)
#     g = g.map_offdiag(plt.scatter)
    plt.gcf().axes[0].set_xlabel('Weighted synapses', fontsize=fs)
    plt.gcf().axes[0].set_ylabel('Count (normalized)', fontsize=fs)
    
    for ax in g.axes.flat:
        ax.xaxis.set_major_locator(ticker.MaxNLocator(4, prune="both"))
        ax.yaxis.set_major_locator(ticker.MaxNLocator(4, prune="both"))

        ax.tick_params(axis='both', which='major', labelsize=tfs)
    
    plt.gcf().set_size_inches(10,6)
    plt.title('Distribution of weighted synapses')
    plt.show()


Plot X-Y Plane


In [10]:
xval = df2['cx'].unique()
yval = df2['cy'].unique()
zval = df2['cz'].unique()
ncx = []
ncy = []
nval = []

output = [0, 0, 0]
for i in xval:
    for j in yval:    
        val = df2[(df2['cx'] == i) & (df2['cy'] == j)]['weighted'].sum()
        output = vstack((output, [i, j, val]))
dfout = pd.DataFrame(output)
dfout.columns = ['cx', 'cy', 'weighted']
m, n = dfout.shape
dfout = dfout[1:m]
print len(dfout['weighted'])


3192

In [11]:
print dfout['weighted'].unique()[::3]


[ 2753.53758224  2656.40184227  3028.22798933 ...,  2365.08558707
  2583.99299544  2404.55947334]

In [12]:
sns.set_style('white')

fs = 18
tfs = 14

ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.scatter(xs = dfout['cx'], ys = dfout['cy'], zs = dfout['weighted'], 
                     s = dfout['weighted']/100, c = dfout['weighted'], cmap = 'winter',
                     edgecolors = [0,0,0,0])

plt.xticks(dfout['cx'].unique()[::20], fontsize=tfs)
plt.yticks(dfout['cy'].unique()[::10], fontsize=tfs)
#plt.zticks(dfout['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(dfout['weighted'].unique()[::3])
ax.view_init(azim = 35)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title('Distribution of Synampses in X-Y plan', fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()


Curve Fitting with X-Y plane


In [13]:
x_point = np.array(dfout['cx'])
y_point = np.array(dfout['cy'])
synapses = np.array(dfout['weighted'])

def synFun(x, a, b, c):
    return a * x[0] + b * x[1] + c

popt, pcov = curve_fit(synFun, [x_point, y_point], synapses)

print "Estimated coefficients are: %f, %f, %f" % (popt[0], popt[1], popt[2])


Estimated coefficients are: 0.078664, -0.433073, 3399.517072

Try different function and compute R^2


In [14]:
def test_linear_model(x, a, b, c):
    return a * x[0] + b * x[1] + c 
popt = curve_fit(test_linear_model, [x_point, y_point], synapses)
chi_squared = np.sum(((test_linear_model([x_point, y_point], popt[0][0], popt[0][1], popt[0][2]) - synapses)) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
print 'Testing linear_model'
print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

def test_quadratic_model(x, a, b, c):
    return a * x[0] ** 2 + b * x[1] ** 2 + c 
popt = curve_fit(test_quadratic_model, [x_point, y_point], synapses)
chi_squared = np.sum(((test_quadratic_model([x_point, y_point], popt[0][0], popt[0][1], popt[0][2]) - synapses)) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
print 'Testing quadratic_model'
print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

def test_cubic_model(x, a, b, c, d, e):
    return a * x[0] ** 3 + b * x[1] ** 3 + c * x[0] + d * x[1] + e 
popt = curve_fit(test_cubic_model, [x_point, y_point], synapses)
chi_squared = np.sum(((test_cubic_model([x_point, y_point], popt[0][0], popt[0][1], popt[0][2], popt[0][3], popt[0][4]) - synapses)) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
print 'Testing cubic_model'
print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 
 
def test_quartic_model(x, a, b, c, d, e):
    return a * x[0] ** 4 + b * x[1] ** 4 + c * x[0] ** 2 + d * x[1] ** 2 + e   
popt = curve_fit(test_quartic_model, [x_point, y_point], synapses)
chi_squared = np.sum(((test_quartic_model([x_point, y_point], popt[0][0], popt[0][1], popt[0][2], popt[0][3], popt[0][4]) - synapses)) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
print 'Testing quartic_model'
print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

def test_logarithmic_model(x, a, b, c):
    return a * np.log(x[0]) + b * np.log(x[1]) + c   
popt = curve_fit(test_logarithmic_model, [x_point, y_point], synapses)
chi_squared = np.sum(((test_logarithmic_model([x_point, y_point], popt[0][0], popt[0][1], popt[0][2]) - synapses)) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
print 'Testing logarithmic_model'
print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

def test_powerlaw_model(x, a, b, c, d):
    return a * (x[0] ** b) + c * (x[1] * d)   
popt = curve_fit(test_powerlaw_model, [x_point, y_point], synapses)
chi_squared = np.sum(((test_powerlaw_model([x_point, y_point], popt[0][0], popt[0][1], popt[0][2], popt[0][3]) - synapses)) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))

print 'Testing powerlaw_model'
print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print 

def test_interaction_model(x, a, b, c, d):
    return a * x[0] + b * x[1] + c * x[0] * x[1] + d    

popt = curve_fit(test_interaction_model, [x_point, y_point], synapses)
chi_squared = np.sum(((test_interaction_model([x_point, y_point], popt[0][0], popt[0][1], popt[0][2], popt[0][3]) - synapses)) ** 2)
reduced_chi_squared = chi_squared / (len(x_point) + len(y_point) - len(popt))
print 'Testing interaction_model'
print 'The degrees of freedom for this test is', len(x_point) + len(y_point) - len(popt)
print 'The chi squared value is: ', ("%.2f" % chi_squared)
print 'The reduced chi squared value is: ', ("%.2f" % reduced_chi_squared)
print


Testing linear_model
The degrees of freedom for this test is 6382
The chi squared value is:  369310706.41
The reduced chi squared value is:  57867.55

Testing quadratic_model
The degrees of freedom for this test is 6382
The chi squared value is:  354842768.47
The reduced chi squared value is:  55600.56

Testing cubic_model
The degrees of freedom for this test is 6382
The chi squared value is:  333490887.78
The reduced chi squared value is:  52254.92

Testing quartic_model
The degrees of freedom for this test is 6382
The chi squared value is:  337008269.64
The reduced chi squared value is:  52806.06

Testing logarithmic_model
The degrees of freedom for this test is 6382
The chi squared value is:  386741527.58
The reduced chi squared value is:  60598.80

Testing powerlaw_model
The degrees of freedom for this test is 6382
The chi squared value is:  377808966.68
The reduced chi squared value is:  59199.15

Testing interaction_model
The degrees of freedom for this test is 6382
The chi squared value is:  369231539.88
The reduced chi squared value is:  57855.15

Compute Test Statistics for each Model


In [15]:
popt = curve_fit(test_linear_model, [x_point, y_point], synapses)
model_1 = Model(test_linear_model)
result_1 = model_1.fit(synapses, x = [x_point, y_point], a = popt[0][0], b = popt[0][1], c = popt[0][2])

popt = curve_fit(test_quadratic_model, [x_point, y_point], synapses)
model_2 = Model(test_quadratic_model)
result_2 = model_2.fit(synapses, x = [x_point, y_point], a = popt[0][0], b = popt[0][1], c = popt[0][2])

popt = curve_fit(test_cubic_model, [x_point, y_point], synapses)
model_3 = Model(test_cubic_model)
result_3 = model_3.fit(synapses, x = [x_point, y_point], a = popt[0][0], b = popt[0][1], c = popt[0][2], d = popt[0][3], e = popt[0][4])

popt = curve_fit(test_quartic_model, [x_point, y_point], synapses)
model_4 = Model(test_quartic_model)
result_4 = model_4.fit(synapses, x = [x_point, y_point], a = popt[0][0], b = popt[0][1], c = popt[0][2], d = popt[0][3], e = popt[0][4])

popt = curve_fit(test_logarithmic_model, [x_point, y_point], synapses)
model_5 = Model(test_logarithmic_model)
result_5 = model_5.fit(synapses, x = [x_point, y_point], a = popt[0][0], b = popt[0][1], c = popt[0][2])

popt = curve_fit(test_powerlaw_model, [x_point, y_point], synapses)
model_6 = Model(test_powerlaw_model)
result_6 = model_6.fit(synapses, x = [x_point, y_point], a = popt[0][0], b = popt[0][1], c = popt[0][2], d = popt[0][3])

popt = curve_fit(test_interaction_model, [x_point, y_point], synapses)
model_7 = Model(test_interaction_model)
result_7 = model_7.fit(synapses, x = [x_point, y_point], a = popt[0][0], b = popt[0][1], c = popt[0][2], d = popt[0][3])


 - Adding parameter "a"
 - Adding parameter "b"
 - Adding parameter "c"
 - Adding parameter "a"
 - Adding parameter "b"
 - Adding parameter "c"
 - Adding parameter "a"
 - Adding parameter "b"
 - Adding parameter "c"
 - Adding parameter "d"
 - Adding parameter "e"
 - Adding parameter "a"
 - Adding parameter "b"
 - Adding parameter "c"
 - Adding parameter "d"
 - Adding parameter "e"
 - Adding parameter "a"
 - Adding parameter "b"
 - Adding parameter "c"
 - Adding parameter "a"
 - Adding parameter "b"
 - Adding parameter "c"
 - Adding parameter "d"
 - Adding parameter "a"
 - Adding parameter "b"
 - Adding parameter "c"
 - Adding parameter "d"

In [16]:
print result_1.fit_report()
print result_2.fit_report()
print result_3.fit_report()
print result_4.fit_report()
print result_5.fit_report()
print result_6.fit_report()
print result_7.fit_report()
print "According to AIC, Cubic Model is so far the best fitted model among 7 candidates."


[[Model]]
    Model(test_linear_model)
[[Fit Statistics]]
    # function evals   = 6
    # data points      = 3192
    # variables        = 3
    chi-square         = 369310706.413
    reduced chi-square = 115807.685
[[Variables]]
    a:   0.07866415 +/- 0.006369 (8.10%) (init= 0.07866415)
    b:  -0.43307322 +/- 0.014084 (3.25%) (init=-0.4330732)
    c:   3399.51712 +/- 32.80890 (0.97%) (init= 3399.517)
[[Correlations]] (unreported correlations are <  0.100)
    C(b, c)                      = -0.897 
    C(a, c)                      = -0.401 

[[Model]]
    Model(test_quadratic_model)
[[Fit Statistics]]
    # function evals   = 6
    # data points      = 3192
    # variables        = 3
    chi-square         = 354842768.469
    reduced chi-square = 111270.859
[[Variables]]
    a:   2.2450e-05 +/- 1.48e-06 (6.59%) (init= 2.245049e-05)
    b:  -0.00010618 +/- 3.29e-06 (3.10%) (init=-0.0001061814)
    c:   3024.24155 +/- 17.81681 (0.59%) (init= 3024.242)
[[Correlations]] (unreported correlations are <  0.100)
    C(b, c)                      = -0.840 
    C(a, c)                      = -0.429 

[[Model]]
    Model(test_cubic_model)
[[Fit Statistics]]
    # function evals   = 8
    # data points      = 3192
    # variables        = 5
    chi-square         = 333490887.776
    reduced chi-square = 104641.007
[[Variables]]
    a:   1.6458e-08 +/- 1.14e-09 (6.96%) (init= 1.645834e-08)
    b:  -6.4938e-08 +/- 5.58e-09 (8.59%) (init=-6.493843e-08)
    c:  -0.15867434 +/- 0.017583 (11.08%) (init=-0.1586743)
    d:   0.43966592 +/- 0.076132 (17.32%) (init= 0.4396659)
    e:   2496.79146 +/- 105.6004 (4.23%) (init= 2496.791)
[[Correlations]] (unreported correlations are <  0.100)
    C(b, d)                      = -0.984 
    C(d, e)                      = -0.973 
    C(b, e)                      =  0.941 
    C(a, c)                      = -0.939 
    C(c, e)                      = -0.198 
    C(a, e)                      =  0.167 

[[Model]]
    Model(test_quartic_model)
[[Fit Statistics]]
    # function evals   = 8
    # data points      = 3192
    # variables        = 5
    chi-square         = 337008269.636
    reduced chi-square = 105744.672
[[Variables]]
    a:   4.2599e-12 +/- 4.17e-13 (9.78%) (init= 4.259928e-12)
    b:  -1.6121e-11 +/- 2.01e-12 (12.48%) (init=-1.612119e-11)
    c:  -3.0410e-05 +/- 5.37e-06 (17.66%) (init=-3.04101e-05)
    d:   4.7638e-05 +/- 1.95e-05 (40.86%) (init= 4.763826e-05)
    e:   2801.64831 +/- 43.81474 (1.56%) (init= 2801.648)
[[Correlations]] (unreported correlations are <  0.100)
    C(b, d)                      = -0.986 
    C(a, c)                      = -0.963 
    C(d, e)                      = -0.938 
    C(b, e)                      =  0.895 
    C(c, e)                      = -0.243 
    C(a, e)                      =  0.205 

[[Model]]
    Model(test_logarithmic_model)
[[Fit Statistics]]
    # function evals   = 6
    # data points      = 3192
    # variables        = 3
    chi-square         = 386741527.576
    reduced chi-square = 121273.605
[[Variables]]
    a:   94.8479671 +/- 11.04055 (11.64%) (init= 94.84797)
    b:  -842.312327 +/- 29.21952 (3.47%) (init=-842.3123)
    c:   8366.70576 +/- 237.7225 (2.84%) (init= 8366.706)
[[Correlations]] (unreported correlations are <  0.100)
    C(b, c)                      = -0.937 
    C(a, c)                      = -0.348 

[[Model]]
    Model(test_powerlaw_model)
[[Fit Statistics]]
    # function evals   = 13
    # data points      = 3192
    # variables        = 4
    chi-square         = 377808948.486
    reduced chi-square = 118509.708
[[Variables]]
    a:   2899.41355 +/- 73.75512 (2.54%) (init= 2898.504)
    b:   0.02742195 +/- 0.003107 (11.33%) (init= 0.02745806)
    c:   0.68875461 +/- 1.68e+05 (24398681.26%) (init= 0.6866672)
    d:  -0.62861034 +/- 1.54e+05 (24543475.78%) (init=-0.6304244)
[[Correlations]] (unreported correlations are <  0.100)
    C(c, d)                      =  1.000 
    C(a, b)                      = -0.942 

[[Model]]
    Model(test_interaction_model)
[[Fit Statistics]]
    # function evals   = 7
    # data points      = 3192
    # variables        = 4
    chi-square         = 369231539.878
    reduced chi-square = 115819.178
[[Variables]]
    a:   0.10440719 +/- 0.031782 (30.44%) (init= 0.1044072)
    b:  -0.40762572 +/- 0.033849 (8.30%) (init=-0.4076257)
    c:  -1.2314e-05 +/- 1.49e-05 (120.95%) (init=-1.23143e-05)
    d:   3346.31911 +/- 72.22747 (2.16%) (init= 3346.319)
[[Correlations]] (unreported correlations are <  0.100)
    C(a, c)                      = -0.980 
    C(b, d)                      = -0.980 
    C(a, d)                      = -0.909 
    C(b, c)                      = -0.909 
    C(a, b)                      =  0.891 
    C(c, d)                      =  0.891 

According to AIC, Cubic Model is so far the best fitted model among 7 candidates.

Plot the fitted and actual number of synapses distribution


In [17]:
popt = curve_fit(test_cubic_model, [x_point, y_point], synapses)

def test_cubic_model(x, a, b, c, d, e):
    return a * x[0] ** 3 + b * x[1] ** 3 + c * x[0] + d * x[1] + e 

pred = test_cubic_model([x_point, y_point],popt[0][0], popt[0][1], popt[0][2], popt[0][3], popt[0][4])

series = [dfout['weighted'], pred]
stri = ["Actual Distribution of Synampses in X-Y plan", "Fitted Distribution of Synampses in X-Y plan"]

for i, k in zip(series, stri):
    sns.set_style('white')

    fs = 18
    tfs = 14

    ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
    patches = ax.scatter(xs = dfout['cx'], ys = dfout['cy'], zs = i, 
                         s = i/100, c = i, cmap = 'winter',
                         edgecolors = [0,0,0,0])

    plt.xticks(dfout['cx'].unique()[::20], fontsize=tfs)
    plt.yticks(dfout['cy'].unique()[::10], fontsize=tfs)
    #plt.zticks(dfout['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
    #ax.zaxis.set_ticks(dfout['weighted'].unique()[::3])
    ax.view_init(azim = 35)
    plt.tick_params(axis='both', which='major', labelsize=tfs)
    plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
    plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
    plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
    plt.title(k, fontsize=fs)
    plt.colorbar(patches)
    plt.tight_layout()
    plt.show()



In [18]:
residual = abs(pred - dfout['weighted'])

sns.set_style('white')

fs = 18
tfs = 14

ax = plt.figure(figsize = (12, 8)).gca(projection = '3d')
patches = ax.scatter(xs = dfout['cx'], ys = dfout['cy'], zs = residual, 
                     s = residual/10, c = residual, cmap = 'winter',
                     edgecolors = [0,0,0,0])

plt.xticks(dfout['cx'].unique()[::20], fontsize=tfs)
plt.yticks(dfout['cy'].unique()[::10], fontsize=tfs)
#plt.zticks(dfout['weighted'].unique()[0, 1000, 2000, 3000], fontsize=tfs)
#ax.zaxis.set_ticks(dfout['weighted'].unique()[::3])
ax.view_init(azim = 45)
plt.tick_params(axis='both', which='major', labelsize=tfs)
plt.xlabel('X-coordinate', fontsize = fs, labelpad=20)
plt.ylabel('Y-coordinate', fontsize = fs, labelpad=20)
plt.gca().set_zlabel('Number of Synapses', fontsize = fs, labelpad=20)
plt.title("Difference of Actual and Fitted Number of Synapses", fontsize=fs)
plt.colorbar(patches)
plt.tight_layout()
plt.show()


Seperately display clusters distribution


In [19]:
from sklearn.cluster import KMeans
optn = 6
est = KMeans(n_clusters = optn)
est.fit(df2['weighted'].reshape(-1,1))
labels = est.labels_
df2['label'] = labels

# Sort groups by means
groups = np.sort(df2['label'].unique())

means = np.array([df2.loc[df2['label']==label,'weighted'].mean() for label in groups])
print means
reindex = np.argsort(means)
print reindex

new_labels = np.array(labels)

for label in groups:
    new_labels[labels==label] = np.argmax(reindex==label)

df2['label'] = new_labels

means = np.array([df2.loc[df2['label']==label,'weighted'].mean() for label in groups])

df2.head()


[ 155.17545993  259.95359669  307.1369433   213.45950598   80.20085416
  368.73636454]
[4 0 3 1 2 5]
Out[19]:
cx cy cz unmasked synapses weighted label
6292 448 1369 55 126357 153 238.063772 3
6293 448 1369 166 139932 207 290.840237 4
6294 448 1369 277 150269 194 253.824488 3
6295 448 1369 388 138071 159 226.410122 2
6296 448 1369 499 150842 258 336.278119 4

In [14]:
for i in range(0, 6):
    df3 = df2[df2['label'] == i]
    
    sns.set_style('white')

    fs = 18
    tfs = 14

    wt_max = 400 
    wt_min = df3['weighted'].min()
    cmap = np.array(sns.color_palette('YlGnBu'))

    colors = [np.append(cmap[i], np.min((1,(wt - wt_min)/(wt_max - wt_min)))) for wt in df3['weighted']]

    ax = plt.figure(figsize = (12, 10)).gca(projection = '3d')
    patches = ax.scatter(xs = df3['cx'], ys = df3['cy'], zs = df3['cz'], 
                         s = 20, c = colors, edgecolors = [0,0,0,0], alpha = 0.5)
    plt.xticks(xvals[::20], fontsize = tfs)
    plt.yticks(yvals[::10], fontsize = tfs)
    ax.zaxis.set_ticks(zvals[::3])
    plt.tick_params(axis = 'both', which='major', labelsize = tfs)
    plt.xlabel('X (um)', fontsize = fs, labelpad = 20)
    plt.ylabel('Y (um)', fontsize = fs, labelpad = 20)
    plt.gca().set_zlabel('Z (um)', fontsize = fs, labelpad = 20)
    plt.title('K-means clustering of weighted synapses, Group %d' % (i), fontsize = fs)
    plt.tight_layout()
    plt.savefig('./figs/weighted_synapses/K-means_group_'+str(i)+'.png', format='png', dpi=300)
    plt.show()