In [1]:
drive_path = 'c:/'
import numpy as np
import pandas as pd
import os
import sys
import matplotlib.pyplot as plt
from scipy.stats import ks_2samp
from scipy.stats import anderson_ksamp
from scipy.stats import kruskal
from scipy.stats import variation
%matplotlib inline
import seaborn as sns
from scipy.stats import zscore
from scipy.stats import nanmean
from scipy.stats import nanstd
from scipy.cluster.hierarchy import dendrogram, linkage

In [2]:
filename='C:\Users\Annie\Documents\Data\Ca_Imaging\Analysis\\Odor_Panel\\Odor_Trials.csv'
trials=pd.read_csv(filename)

In [3]:
z='C:\Users\Annie\Documents\Data\Ca_Imaging\GoodFiles\\fullpeak_WL.csv'
peak=pd.read_csv(z)
s='C:\Users\Annie\Documents\Data\Ca_Imaging\GoodFiles\\fullsuccess_WI.csv'
success=pd.read_csv(s)

Reliability vs Peak


In [4]:
melt_success=pd.melt(success,('Mouse','Group'),var_name='Odor',value_name='Reliability')
melt_peak=pd.melt(peak,('Mouse','Group'),var_name='Odor',value_name='Peak')

In [5]:
peak.head()


Out[5]:
Group Mouse IAA10 IAA01 AP MS10 IAA05 Hexanal01 EB MS01 PA MS05 Blank Hexanone Hexanal10 THA Hexanal05
0 Control G PMT (1)_160321_1 0.099708 0.188597 0.036738 0.150474 0.034294 0.062999 -0.061840 0.015671 0.037389 0.169456 NaN 0.093353 0.069948 0.067358 0.124211
1 Control G PMT (2)_160321_1 0.041676 0.188666 0.073576 0.074001 0.001814 0.014205 0.056893 -0.034636 -0.002271 0.139423 NaN -0.026087 0.036447 0.095266 0.101018
2 Control G PMT (3)_160321_1 0.287615 0.477065 0.058214 0.260675 0.127803 0.129928 0.153817 -0.044179 0.221412 0.095906 NaN 0.022945 0.263380 0.146676 0.293199
3 Control G PMT (4)_160321_1 0.034975 0.248613 0.110235 0.071141 0.104363 0.104142 0.082859 0.146939 0.051386 0.157300 NaN -0.057530 0.037260 0.094015 0.198974
4 Control G PMT (5)_160321_1 0.309290 0.393188 0.116399 0.220858 0.240154 0.315627 0.694684 0.268299 0.203381 0.233311 NaN -0.147715 0.145721 0.069755 0.538062

In [6]:
success_peak=pd.concat([melt_peak,melt_success.Reliability],axis=1)
control=success_peak[success_peak['Group']=='Control']
mint=success_peak[success_peak['Group']=='Mint']
hexanal=success_peak[success_peak['Group']=='Hexanal']

In [7]:
sns.set(style='white',palette="muted", color_codes=True);
sns.set_context("paper",font_scale=2);
ax=sns.barplot(x='Reliability',y='Peak',hue='Group',data=success_peak, palette=['black',sns.xkcd_rgb['azure'],sns.xkcd_rgb['grey']],hue_order=['Control','Hexanal','Mint']);

ax.set(xlabel='Reliability', ylabel='Peak')
ax.set_xticklabels([0,0.25,0.33,0.5,0.66,0.75,1.0])
sns.despine(left=True,bottom=True);
plt.tight_layout();

plt.savefig("C:\Users\Annie\Desktop\Plasticity\Figures\Excitatory\Success_Peak.pdf")



In [42]:
sns.set(style='white',palette="muted", color_codes=True);
sns.set_context("paper",font_scale=2);
sns.barplot(x='Reliability',y='Peak',data=hexanal);
# plt.ylim(0,5)
plt.tight_layout();



In [41]:
sns.set(style='white',palette="muted", color_codes=True);
sns.set_context("paper",font_scale=2);
sns.barplot(x='Reliability',y='Peak',data=mint);
plt.xl
# plt.ylim(0,5)
plt.tight_layout();



In [33]:
sns.set(style='white',palette="muted", color_codes=True);
sns.set_context("paper",font_scale=2);
sns.jointplot('Reliability','Peak',mint).set_axis_labels("Reliability", "Peak");
# sns.jointplot('Reliability','Peak',hexanal).set_axis_labels('Reliability','Peak');
sns.despine(left=True,bottom=True);
plt.ylim(0,5)
plt.tight_layout();



In [29]:
sns.set(style='white',palette="muted", color_codes=True);
sns.set_context("paper",font_scale=2);
sns.jointplot('Reliability','Peak',hexanal).set_axis_labels('Reliability','Peak');
sns.despine(left=True,bottom=True);
plt.ylim(0,5)
plt.tight_layout();



In [30]:
sns.set(style='white',palette="muted", color_codes=True);
sns.set_context("paper",font_scale=2);
sns.jointplot('Reliability','Peak',control).set_axis_labels('Reliability','Peak');
sns.despine(left=True,bottom=True);
plt.ylim(0,5)
plt.tight_layout();



In [40]:
sns.set(style='white',palette="muted", color_codes=True);
sns.set_context("paper",font_scale=2);
sns.jointplot('Peak','Reliability',success_peak).set_axis_labels("Peak DF/F", "Baseline Fluorescence");
sns.despine(left=True,bottom=True);
plt.tight_layout();


KMeans


In [8]:
import numpy as np
from kmodes import kmodes


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-8-9ad5e0d12599> in <module>()
      1 import numpy as np
----> 2 from kmodes import kmodes

ImportError: No module named kmodes

In [9]:
import kmodes


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-9-13e229bcbe6d> in <module>()
----> 1 import kmodes

ImportError: No module named kmodes

In [ ]:
# random categorical data
data = np.random.choice(20, (100, 10))

km = kmodes.KModes(n_clusters=4, init='Huang', n_init=5, verbose=1)

clusters = km.fit_predict(data)

# Print the cluster centroids
print(km.cluster_centroids_)

In [85]:
c=peak[peak.Group=='Control'].ix[:,2:]

In [86]:
k=2
kmeans = cluster.KMeans(n_clusters=k)
kmeans.fit(c)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-86-611175834ee9> in <module>()
      1 k=2
      2 kmeans = cluster.KMeans(n_clusters=k)
----> 3 kmeans.fit(c)

C:\Users\Annie\Anaconda2\lib\site-packages\sklearn\cluster\k_means_.py in fit(self, X, y)
    810         """
    811         random_state = check_random_state(self.random_state)
--> 812         X = self._check_fit_data(X)
    813 
    814         self.cluster_centers_, self.labels_, self.inertia_, self.n_iter_ = \

C:\Users\Annie\Anaconda2\lib\site-packages\sklearn\cluster\k_means_.py in _check_fit_data(self, X)
    784     def _check_fit_data(self, X):
    785         """Verify that the number of samples given is larger than k"""
--> 786         X = check_array(X, accept_sparse='csr', dtype=np.float64)
    787         if X.shape[0] < self.n_clusters:
    788             raise ValueError("n_samples=%d should be >= n_clusters=%d" % (

C:\Users\Annie\Anaconda2\lib\site-packages\sklearn\utils\validation.pyc in check_array(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
    396                              % (array.ndim, estimator_name))
    397         if force_all_finite:
--> 398             _assert_all_finite(array)
    399 
    400     shape_repr = _shape_repr(array.shape)

C:\Users\Annie\Anaconda2\lib\site-packages\sklearn\utils\validation.pyc in _assert_all_finite(X)
     52             and not np.isfinite(X).all()):
     53         raise ValueError("Input contains NaN, infinity"
---> 54                          " or a value too large for %r." % X.dtype)
     55 
     56 

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

Separating by sex


In [4]:
control=peak[peak['Group']=='Control']
mint=peak[peak['Group']=='Mint']
hexanal=peak[peak['Group']=='Hexanal']

In [16]:
# control.to_csv('C:\Users\Annie\Desktop\\Plasticity\\controlpeaks.csv')
# mint.to_csv('C:\Users\Annie\Desktop\\Plasticity\\mintpeaks.csv')
# hexanal.to_csv('C:\Users\Annie\Desktop\\Plasticity\\hexanalpeaks.csv')

In [5]:
c=pd.read_csv('C:\Users\Annie\Desktop\\Plasticity\\controlpeaks.csv')
m=pd.read_csv('C:\Users\Annie\Desktop\\Plasticity\\mintpeaks.csv')
h=pd.read_csv('C:\Users\Annie\Desktop\\Plasticity\\hexanalpeaks.csv')

In [6]:
c.head()


Out[6]:
Group Mouse Sex IAA10 IAA01 AP MS10 IAA05 Hexanal01 EB MS01 PA MS05 Hexanone Hexanal10 THA Hexanal05
0 Control G PMT (1)_160421_1 F 0.261251 0.220532 0.078283 0.174363 0.078846 0.258040 0.224654 -0.015140 0.124194 0.116465 0.244783 0.258894 -0.002940 0.253119
1 Control G PMT (2)_160421_1 F 0.034870 0.156095 0.002472 0.031428 0.088457 0.003294 -0.003833 -0.070036 0.069198 -0.008144 0.032720 -0.006190 -0.039989 0.122363
2 Control G PMT (3)_160421_1 F 0.267157 0.138539 0.137004 0.170831 0.269743 0.077619 0.172468 0.206637 0.078242 0.055920 0.172709 0.229815 0.006948 0.266373
3 Control G PMT (4)_160421_1 F 0.280240 0.269055 0.088021 0.209082 0.283791 0.371138 0.124611 0.178935 -0.000169 0.165357 0.338062 0.340071 -0.062722 0.347664
4 Control G PMT (5)_160421_1 F 0.700963 0.532985 0.439966 0.464252 0.562895 0.827307 0.742206 0.312638 0.408257 0.426777 0.871130 0.729781 0.219649 0.726061

In [6]:
cmelt=pd.melt(c,('Group','Mouse','Sex'),var_name='Odor',value_name='Peak')
mmelt=pd.melt(m,('Group','Mouse','Sex'),var_name='Odor',value_name='Peak')
hmelt=pd.melt(h,('Group','Mouse','Sex'),var_name='Odor',value_name='Peak')

In [13]:
cexcite=cmelt[cmelt.Peak>0]
mexcite=mmelt[mmelt.Peak>0]
hexcite=hmelt[hmelt.Peak>0]

In [32]:
allexcite=pd.concat([cexcite, mexcite, hexcite])
allexcitem=allexcite[allexcite.Sex=='M']
allexcitef=allexcite[allexcite.Sex=='F']

In [7]:
cmale=cmelt[cmelt.Sex=='M']
cfemale=cmelt[cmelt.Sex=='F']
mmale=mmelt[mmelt.Sex=='M']
mfemale=mmelt[mmelt.Sex=='F']
hmale=hmelt[hmelt.Sex=='M']
hfemale=hmelt[hmelt.Sex=='F']

In [9]:
# odor='Hexanal05'
# kruskal(cmale[cmale.Odor==odor],mmale[mmale.Odor==odor],hmale[hmale.Odor==odor])


C:\Users\Annie\Anaconda2\lib\site-packages\scipy\stats\stats.py:257: RuntimeWarning: The input array could not be properly checked for nan values. nan values will be ignored.
  "values. nan values will be ignored.", RuntimeWarning)
Out[9]:
KruskalResult(statistic=32751.358102116363, pvalue=0.0)

In [23]:
ecm=cmale[cmale.Peak>0]
ecf=cfemale[cfemale.Peak>0]
ecm.Peak.plot.hist(alpha=0.5);
ecf.Peak.plot.hist(alpha=0.5);
sns.set(style='white',palette="muted", color_codes=True);
sns.set_style("ticks")
plt.xlim(0,5);
plt.title('Control')
sns.despine()
plt.savefig("C:\Users\Annie\Desktop\Plasticity\Revision\SexDiff\Control.pdf")



In [9]:
emm=mmale[mmale.Peak>0]
emf=mfemale[mfemale.Peak>0]
emm.Peak.plot.hist(alpha=0.5);
emf.Peak.plot.hist(alpha=0.5);
sns.set(style='white',palette="muted", color_codes=True);
sns.set_style("ticks")
plt.title('Mint')
plt.xlim(0,5);
sns.despine()
# plt.savefig("C:\Users\Annie\Desktop\Plasticity\Revision\SexDiff\Mint.pdf")



In [10]:
ehm=hmale[hmale.Peak>0]
ehf=hfemale[hfemale.Peak>0]
ehm.Peak.plot.hist(alpha=0.5);
ehf.Peak.plot.hist(alpha=0.5);
sns.set(style='white',palette="muted", color_codes=True);
sns.set_style("ticks")
plt.xlim(0,5);
plt.title('Hexanal')
sns.despine()
# plt.savefig("C:\Users\Annie\Desktop\Plasticity\Revision\SexDiff\Hexanal.pdf")



In [87]:
mfemale.Peak.plot.hist(alpha=0.5)
# mmale.Peak.plot.hist(alpha=1)
cfemale.Peak.plot.hist(alpha=0.5)
# cmale.Peak.plot.hist(alpha=0.5)
# hfemale.Peak.plot.hist(alpha=0.5)
# hmale.Peak.plot.hist(alpha=0.5)


Out[87]:
<matplotlib.axes._subplots.AxesSubplot at 0x10ec9240>

In [61]:
kruskal(hmale.Peak,cmale.Peak.dropna())


Out[61]:
KruskalResult(statistic=477.478614715064, pvalue=7.5577836561211752e-106)

In [19]:
cat=('M','F')
sns.set(style="white", palette="muted", color_codes=True);
sns.set_context("paper", font_scale=1.3);
plt.figure(figsize=(8.5,5));
sns.boxplot(x='Odor',y='Peak',hue_order=cat,hue='Sex',data=mexcite);
# ax.legend_.remove()
sns.despine()
plt.ylabel('DF/F', fontsize=12);
plt.title('Peak DF/F, 1%, Mint', fontsize=12);
plt.xlabel('Odor', fontsize=12);
plt.ylim(-1,5)
plt.tight_layout()
plt.savefig("C:\Users\Annie\Desktop\Plasticity\Revision\SexDiff\Mint_e.pdf")



In [20]:
at=('M','F')
sns.set(style="white", palette="muted", color_codes=True);
sns.set_context("paper", font_scale=1.3);
plt.figure(figsize=(9,5));
sns.boxplot(x='Odor',y='Peak',hue_order=cat,hue='Sex',data=hexcite);
# ax.legend_.remove()
sns.despine()
plt.ylabel('DF/F', fontsize=12);
plt.title('Peak DF/F, 1%, Hexanal', fontsize=12);
plt.xlabel('Odor', fontsize=12);
plt.ylim(-1,5)
plt.tight_layout()
plt.savefig("C:\Users\Annie\Desktop\Plasticity\Revision\SexDiff\hexanal_e.pdf")



In [22]:
at=('M','F')
sns.set(style="white", palette="muted", color_codes=True);
sns.set_context("paper", font_scale=1.3);
plt.figure(figsize=(9,5));
sns.boxplot(x='Odor',y='Peak',hue_order=cat,hue='Sex',data=cexcite);
# ax.legend_.remove()
sns.despine()
plt.ylabel('DF/F', fontsize=12);
plt.title('Peak DF/F, 1%, Control', fontsize=12);
plt.xlabel('Odor', fontsize=12);
plt.ylim(-1,5)
plt.tight_layout()
plt.savefig("C:\Users\Annie\Desktop\Plasticity\Revision\SexDiff\Control_e.pdf")



In [37]:
allexcitef.tail()


Out[37]:
Group Mouse Sex Odor Peak
5161 Hexanal G PMT (22)_160626_2 F Hexanal05 2.629784
5162 Hexanal G PMT (23)_160626_2 F Hexanal05 2.228448
5163 Hexanal G PMT (24)_160626_2 F Hexanal05 2.581529
5164 Hexanal G PMT (25)_160626_2 F Hexanal05 1.906673
5165 Hexanal G PMT (26)_160626_2 F Hexanal05 2.273161

In [43]:
# at=('M','F')
sns.set(style="white", palette="muted", color_codes=True);
sns.set_context("paper", font_scale=1.3);
plt.figure(figsize=(9,5));
sns.boxplot(x='Odor',y='Peak',hue='Group',data=allexcitef);
# ax.legend_.remove()
sns.despine()
plt.ylabel('DF/F', fontsize=12);
plt.title('Peak DF/F, 1%, Female', fontsize=12);
plt.xlabel('Odor', fontsize=12);
plt.ylim(-1,5)
plt.tight_layout()
plt.savefig("C:\Users\Annie\Desktop\Plasticity\Revision\SexDiff\\allfemale.pdf")



In [44]:
# at=('M','F')
sns.set(style="white", palette="muted", color_codes=True);
sns.set_context("paper", font_scale=1.3);
plt.figure(figsize=(9,5));
sns.boxplot(x='Odor',y='Peak',hue='Group',data=allexcitem);
# ax.legend_.remove()
sns.despine()
plt.ylabel('DF/F', fontsize=12);
plt.title('Peak DF/F, 1%, Male', fontsize=12);
plt.xlabel('Odor', fontsize=12);
plt.ylim(-1,5)
plt.tight_layout()
plt.savefig("C:\Users\Annie\Desktop\Plasticity\Revision\SexDiff\\allmale.pdf")


Dendrogram


In [27]:
peak.head()


Out[27]:
Group Mouse IAA10 IAA01 AP MS10 IAA05 Hexanal01 EB MS01 PA MS05 Blank Hexanone Hexanal10 THA Hexanal05
0 Control G PMT (1)_160321_1 0.099708 0.188597 0.036738 0.150474 0.034294 0.062999 -0.061840 0.015671 0.037389 0.169456 NaN 0.093353 0.069948 0.067358 0.124211
1 Control G PMT (2)_160321_1 0.041676 0.188666 0.073576 0.074001 0.001814 0.014205 0.056893 -0.034636 -0.002271 0.139423 NaN -0.026087 0.036447 0.095266 0.101018
2 Control G PMT (3)_160321_1 0.287615 0.477065 0.058214 0.260675 0.127803 0.129928 0.153817 -0.044179 0.221412 0.095906 NaN 0.022945 0.263380 0.146676 0.293199
3 Control G PMT (4)_160321_1 0.034975 0.248613 0.110235 0.071141 0.104363 0.104142 0.082859 0.146939 0.051386 0.157300 NaN -0.057530 0.037260 0.094015 0.198974
4 Control G PMT (5)_160321_1 0.309290 0.393188 0.116399 0.220858 0.240154 0.315627 0.694684 0.268299 0.203381 0.233311 NaN -0.147715 0.145721 0.069755 0.538062

In [33]:
cpeak=peak[peak.Group=='Control']
mpeak=peak[peak.Group=='Mint']
hpeak=peak[peak.Group=='Hexanal']

In [34]:
X=peak[['IAA10','IAA01','AP','MS10','IAA05','Hexanal01','EB','MS01','PA','MS05','Hexanone','Hexanal10','THA','Hexanal05']]
C=cpeak[['IAA10','IAA01','AP','MS10','IAA05','Hexanal01','EB','MS01','PA','MS05','Hexanone','Hexanal10','THA','Hexanal05']]
M=mpeak[['IAA10','IAA01','AP','MS10','IAA05','Hexanal01','EB','MS01','PA','MS05','Hexanone','Hexanal10','THA','Hexanal05']]
H=hpeak[['IAA10','IAA01','AP','MS10','IAA05','Hexanal01','EB','MS01','PA','MS05','Hexanone','Hexanal10','THA','Hexanal05']]

In [35]:
Z=linkage(X)
CZ=linkage(C)
MZ=linkage(M)
HZ=linkage(H)

In [20]:
from scipy.cluster.hierarchy import cophenet
from scipy.spatial.distance import pdist

In [39]:
pd.DataFrame(HZ)


Out[39]:
0 1 2 3
0 238.0 239.0 0.100247 2.0
1 240.0 246.0 0.110695 2.0
2 338.0 339.0 0.113964 2.0
3 122.0 126.0 0.115886 2.0
4 216.0 225.0 0.115988 2.0
5 46.0 229.0 0.124773 2.0
6 331.0 333.0 0.128011 2.0
7 329.0 375.0 0.134020 3.0
8 130.0 137.0 0.136090 2.0
9 121.0 134.0 0.136509 2.0
10 242.0 243.0 0.137313 2.0
11 13.0 14.0 0.140912 2.0
12 210.0 211.0 0.145379 2.0
13 23.0 380.0 0.147315 3.0
14 372.0 377.0 0.148624 4.0
15 332.0 340.0 0.149896 2.0
16 72.0 78.0 0.150523 2.0
17 199.0 209.0 0.152150 2.0
18 233.0 369.0 0.152667 3.0
19 80.0 84.0 0.153193 2.0
20 230.0 374.0 0.153480 3.0
21 370.0 387.0 0.153750 5.0
22 271.0 272.0 0.153828 2.0
23 236.0 379.0 0.156002 3.0
24 120.0 378.0 0.158685 3.0
25 129.0 383.0 0.158714 5.0
26 237.0 390.0 0.159039 6.0
27 342.0 384.0 0.159053 3.0
28 67.0 82.0 0.159960 2.0
29 220.0 224.0 0.161044 2.0
... ... ... ... ...
338 695.0 703.0 0.725884 6.0
339 155.0 706.0 0.733385 333.0
340 287.0 708.0 0.743016 334.0
341 365.0 707.0 0.753560 7.0
342 699.0 709.0 0.754472 336.0
343 696.0 710.0 0.754626 9.0
344 141.0 711.0 0.756929 337.0
345 205.0 713.0 0.773977 338.0
346 366.0 712.0 0.782250 10.0
347 170.0 714.0 0.791842 339.0
348 351.0 715.0 0.800248 11.0
349 343.0 363.0 0.807511 2.0
350 358.0 716.0 0.816226 340.0
351 718.0 719.0 0.825686 342.0
352 190.0 720.0 0.858784 343.0
353 21.0 721.0 0.861141 344.0
354 267.0 717.0 0.873012 12.0
355 327.0 722.0 0.879714 345.0
356 326.0 724.0 0.886699 346.0
357 723.0 725.0 0.908697 358.0
358 266.0 726.0 0.909234 359.0
359 169.0 189.0 0.928817 2.0
360 295.0 727.0 0.943164 360.0
361 191.0 729.0 0.949116 361.0
362 581.0 730.0 1.067565 363.0
363 252.0 731.0 1.152794 364.0
364 182.0 728.0 1.161500 3.0
365 183.0 733.0 1.237995 4.0
366 732.0 734.0 1.438012 368.0
367 181.0 735.0 2.243542 369.0

368 rows × 4 columns


In [10]:
c, coph_dists=cophenet(Z,pdist(X))
c


Out[10]:
nan

In [30]:
pd.DataFrame(Z)


Out[30]:
0 1 2 3
0 732.0 733.0 0.100247 2.0
1 345.0 350.0 0.102388 2.0
2 734.0 740.0 0.110695 2.0
3 832.0 833.0 0.113964 2.0
4 426.0 719.0 0.114798 2.0
5 616.0 620.0 0.115886 2.0
6 710.0 867.0 0.115988 3.0
7 366.0 379.0 0.116976 2.0
8 123.0 132.0 0.118534 2.0
9 249.0 259.0 0.118667 2.0
10 51.0 872.0 0.121240 3.0
11 420.0 868.0 0.121326 3.0
12 390.0 396.0 0.123074 2.0
13 540.0 723.0 0.124773 2.0
14 199.0 430.0 0.127480 2.0
15 825.0 827.0 0.128011 2.0
16 216.0 224.0 0.133782 2.0
17 367.0 870.0 0.133796 3.0
18 823.0 878.0 0.134020 3.0
19 362.0 365.0 0.134205 2.0
20 624.0 631.0 0.136090 2.0
21 615.0 628.0 0.136509 2.0
22 736.0 737.0 0.137313 2.0
23 363.0 380.0 0.137672 2.0
24 210.0 211.0 0.139673 2.0
25 190.0 207.0 0.140415 2.0
26 507.0 508.0 0.140912 2.0
27 368.0 886.0 0.140936 3.0
28 425.0 435.0 0.141590 2.0
29 372.0 880.0 0.141912 4.0
... ... ... ... ...
832 635.0 1694.0 0.756929 825.0
833 488.0 1695.0 0.763333 826.0
834 699.0 1696.0 0.773977 827.0
835 860.0 1693.0 0.782250 10.0
836 515.0 1697.0 0.783003 828.0
837 664.0 1699.0 0.791842 829.0
838 845.0 1698.0 0.800248 11.0
839 837.0 857.0 0.807511 2.0
840 852.0 1700.0 0.816226 830.0
841 1702.0 1703.0 0.825686 832.0
842 820.0 1704.0 0.834828 833.0
843 684.0 1705.0 0.858784 834.0
844 761.0 1701.0 0.873012 12.0
845 821.0 1706.0 0.879714 835.0
846 1707.0 1708.0 0.908697 847.0
847 760.0 1709.0 0.909234 848.0
848 663.0 683.0 0.928817 2.0
849 171.0 1710.0 0.930779 849.0
850 789.0 1712.0 0.943164 850.0
851 685.0 1713.0 0.949116 851.0
852 141.0 1714.0 0.958611 852.0
853 88.0 1715.0 0.994161 853.0
854 89.0 1716.0 1.001268 854.0
855 1470.0 1717.0 1.067565 856.0
856 746.0 1718.0 1.152794 857.0
857 676.0 1711.0 1.161500 3.0
858 677.0 1720.0 1.237995 4.0
859 1719.0 1721.0 1.438012 861.0
860 675.0 1722.0 2.243542 862.0
861 70.0 1723.0 inf 863.0

862 rows × 4 columns


In [23]:
plt.figure(figsize=(25,10))
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(Z,leaf_rotation=90., leaf_font_size=8.)
plt.show()


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-23-13e399ac5af4> in <module>()
      3 plt.xlabel('sample index')
      4 plt.ylabel('distance')
----> 5 dendrogram(Z,leaf_rotation=90., leaf_font_size=8.)
      6 plt.show()

C:\Users\Annie\Anaconda2\lib\site-packages\scipy\cluster\hierarchy.pyc in dendrogram(Z, p, truncate_mode, color_threshold, get_leaves, orientation, labels, count_sort, distance_sort, show_leaf_counts, no_plot, no_labels, leaf_font_size, leaf_rotation, leaf_label_func, show_contracted, link_color_func, ax, above_threshold_color)
   2228                          "'bottom', or 'right'")
   2229 
-> 2230     is_valid_linkage(Z, throw=True, name='Z')
   2231     Zs = Z.shape
   2232     n = Zs[0] + 1

C:\Users\Annie\Anaconda2\lib\site-packages\scipy\cluster\hierarchy.pyc in is_valid_linkage(Z, warning, throw, name)
   1410             if ((Z[:, 0] < 0).any() or (Z[:, 1] < 0).any()):
   1411                 raise ValueError('Linkage %scontains negative indices.' %
-> 1412                                  name_str)
   1413             if (Z[:, 2] < 0).any():
   1414                 raise ValueError('Linkage %scontains negative distances.' %

ValueError: Linkage 'Z' contains negative indices.

In [ ]: