In [ ]:
#Clustering multivarié de données racinaires - Sixtine

Importing libraries and declaring data location:


In [1]:
import csv
import numpy as np
%matplotlib inline
import matplotlib.pylab as plt

# Path and filename
csv_path = "/projects/Jonathan/VP/Sixtine/"
csv_fname = "AllLateralRootsbis.csv"

# MultiVariate Pairwise Distance matrix object filename:
mvpd_fname = "mvpd_AllLateralRootsbis.pkz"

Functions used to make the data accessible to pyhton:


In [2]:
decimal_change = lambda x: x.replace(',', '.')
replaceNA = lambda x: x.replace('NA','nan')
strip_white = lambda x: x.replace(' ', '')

Importing the data:


In [3]:
dataframe=[]
head=True
with open(csv_path+csv_fname, 'rb') as f:
    reader = csv.reader(f, delimiter=';')
    for row in reader:
        if head:
            headers = row
            head=False
        else:
            dataframe.append(tuple(map(strip_white,map(replaceNA,(map(decimal_change,row))))))

#dataframe[:2]

In [4]:
# -- Declaring the columns types and headers:
form_types = [int,int,int,np.str,float,float,int,float,float,float]
#zip(headers,form_types)
data = np.array(dataframe[:100], dtype=zip(headers,form_types))
#data

Creating a python variable with observation names as attribute (np.recarray):


In [5]:
d = data.view(np.recarray)
d.day.shape


Out[5]:
(100,)

In [6]:
d.root


Out[6]:
chararray(['\xc8', '\x14', '3', 's', '\xda', '\x91', 'W', 'i', '\x7f', '\x7f',
       '\xdd', '<', 'k', '\x17', 'Q', '~', '\x1e', '-', '\xda', '\xc2',
       '\x17', 'n', '(', 't', '\xf5', 'D', 'M', 'M', 's', 'e', 'e', '\xea',
       '\xaa', '\x11', '\x92', '\xa7', '\xd7', '_', ',', '\xa7', '\xac',
       '\xfa', '\xbb', '\xda', '\xfa', 'X', 'H', '\xdc', 'x', '%', '\xc8',
       '', '\x1d', '\xff', '\x98', 'a', '\xfc', 'U', 'g', '\xef', '\x81',
       '\xde', '\x9d', '\xa0', '%', 'l', '\xe5', '\xb9', '\xb4', '', 'I',
       '\x08', '\xdd', '\xed', 'x', '\x86', '\xb8', 'W', '\xdb', '\x80',
       '\x80', '\xd8', '\xf2', '\xe6', '\x04', 'R', '\xfe', '#', '\xf6',
       '\xf6', '\x99', '\x13', '\x1a', '+', '\x1c', '\x1c', 'q', '2',
       '\x99', 'y'], 
      dtype='|S1')

Computing the MultiVariate Pairvise Distance matrix:


In [7]:
from openalea.stat_tool.multivariate_clustering import mvpd_matrix

MVPD matrix - Step #1: initialisation


In [8]:
# A basic initialisation of the 'mvpd_matrix' object can be done as following:
mvpd = mvpd_matrix()

MVPD matrix - Step #2: computing separates pairwise distance matrices


In [9]:
# ...but the object is empty and you will have to add observations using 'self.add_variable':
mvpd.add_variable(var_data=d.insertion_position, var_name="insertion_position", var_type="Numeric", var_unit="mm")
mvpd.add_variable(var_data=d.growth, var_name="growth", var_type="Numeric", var_unit=u"mm.h\u207B\u00B9")


Computing the pairwise distance matrix for the variable 'insertion_position'...
Computing the pairwise distance matrix for the variable 'growth'...
Out[9]:
"Done adding and creating the pairwise distance matrix for the variable 'growth'."

MVPD matrix - Step #3: standardisation and linear combination of the variables:


In [10]:
# ...and finally we assemble those 3 variables in one MVPD matrix using 'self.create_mvpd_matrix':
mvpd.create_mvpd_matrix(["insertion_position", "growth"], [1/2.,1/2.])


Computing the multi-variate pairwise distance matrix...
...done !

Combining steps #1 & #2:


In [11]:
# Steps one and two can be done automatically using:
mvpd = mvpd_matrix([d.insertion_position, d.growth], ["insertion_position", "growth"],
            ["Numeric", "Numeric"])


Computing the pairwise distance matrix for the variable 'insertion_position'...
Computing the pairwise distance matrix for the variable 'growth'...
Done integrating variables ['insertion_position', 'growth'] as separate NON-standardised pairwise distance matrices.

Combining steps #1, #2 & #3:


In [12]:
# All steps leading to the standardised MVPD matrix can be done AT ONCE:
mvpd = mvpd_matrix([d.insertion_position, d.growth], ["insertion_position", "growth"],
            ["Numeric","Numeric"], [1/2.,1/2.])


Computing the pairwise distance matrix for the variable 'insertion_position'...
Computing the pairwise distance matrix for the variable 'growth'...
Done integrating variables ['insertion_position', 'growth'] as separate NON-standardised pairwise distance matrices.
Computing the multi-variate pairwise distance matrix...
...done !

RUN ONLY TO SAVE...


In [13]:
mvpd.save_mvpd(csv_path+mvpd_fname)


Out[13]:
'0.001s to save the mvpd_matrix under filename /projects/Jonathan/VP/Sixtine/mvpd_AllLateralRootsbis.pkz'

LOAD the mvpd_matrix object


In [14]:
from openalea.stat_tool.multivariate_clustering import mvpd_matrix, load_mvpd
mvpd = load_mvpd(csv_path+mvpd_fname)


Trying to open the mvpd_matrix file /projects/Jonathan/VP/Sixtine/mvpd_AllLateralRootsbis.pkz...
Time to load the mvpd_matrix: 0.001s

Adding variables and computing another standardised MVPD matrix:


In [15]:
# It is still possible to add another variable using 'self.add_variable'.
mvpd.add_variable(var_data=d.parent_length, var_name="parent_length", var_type="Numeric", var_unit="mm")

# To compute a new mvpd matrix, then simply do:
mvpd.create_mvpd_matrix(["parent_length", "growth"], [1/2.,1/2.])
#this method allow a faster creation of the standardised mvpd matrix using the existing pariwise distance matrices saved within the object!


Computing the pairwise distance matrix for the variable 'parent_length'...
Computing the multi-variate pairwise distance matrix...
...done !

In [16]:
mvpd.add_variable(var_data=d.parent_length, var_name="length", var_type="Numeric", var_unit="mm")
mvpd = mvpd_matrix([d.insertion_position, d.length, d.growth], ["insertion_position", "length", "growth"],
            ["Numeric","Numeric","Numeric"], [1/3.,1/3.,1/3.])


Computing the pairwise distance matrix for the variable 'length'...
Computing the pairwise distance matrix for the variable 'insertion_position'...
Computing the pairwise distance matrix for the variable 'length'...
Computing the pairwise distance matrix for the variable 'growth'...
Done integrating variables ['insertion_position', 'length', 'growth'] as separate NON-standardised pairwise distance matrices.
Computing the multi-variate pairwise distance matrix...
...done !

Plotting the MultiVariate Pairvise Distance matrix:


In [17]:
plt.figure(figsize=(6,6))
plt.imshow(mvpd._global_distance_matrix)


Out[17]:
<matplotlib.image.AxesImage at 0x7f210e708d90>

Clustering the MultiVariate Pairwise Distance matrix:

The agglomerative clustering performs a hierarchical clustering using a bottom up approach: each observation starts in its own cluster, and clusters are successively merged together.
The linkage criteria determines the metric used for the merge strategy:

  • Ward minimizes the sum of squared differences within all clusters. It is a variance-minimizing approach and in this sense is similar to the k-means objective function but tackled with an agglomerative hierarchical approach.
  • Maximum or complete linkage minimizes the maximum distance between observations of pairs of clusters.
  • Average linkage minimizes the average of the distances between all observations of pairs of clusters.

Agglomerative clustering can also scale to large number of samples when it is used jointly with a connectivity matrix, but is computationally expensive when no connectivity constraints are added between samples: it considers at each step all the possible merges.

Using the Agglomerative hierarchical method of Ward clustering (metric=inertia):


In [18]:
mvpd.cluster(5, method = "ward")
#mvpd._clustering


Out[18]:
'Done computing ward clustering for 5 clusters.'

Using the low dimension embedding followed by a K-Means of Spectral clustering (metric=euclidian):

SpectralClustering does a low-dimension embedding of the affinity matrix between samples, followed by a KMeans in the low dimensional space. SpectralClustering requires the number of clusters to be specified. It works well for a small number of clusters but is not advised when using many clusters.
Note that if the values of your similarity matrix are not well distributed, e.g. with negative values or with a distance matrix rather than a similarity, the spectral problem will be singular and the problem not solvable.

WARNING: spectral clustering works on an affinity or similarity matrix, and we have previously computed a distance matrix!!
Transforming distance into well-behaved similarities is not necesarilly easy and should be done with care. By default we have used the transformation proposed by scikit.learn: similarity = np.exp(-beta * distance / distance.std()), with beta = 1


In [19]:
mvpd.cluster(5, method = "spectral")


Out[19]:
'Done computing spectral clustering for 5 clusters.'

Checking the clustering output using 'ClustererChecker':


In [20]:
from openalea.stat_tool.multivariate_clustering import ClustererChecker
cc = ClustererChecker(mvpd)


ClustererChecker object initialisation done!

Global informations about the clustering output:


In [21]:
cc.global_cluster_distances()


Out[21]:
(0.91111120346147312, 1.4410033216006157)

Cluster to cluster informations:


In [22]:
cc.within_cluster_distances()


Out[22]:
{0: 0.33507852909013752,
 1: 0.26435402712359557,
 2: 1.0079810995398923,
 3: 0.93615639077013912,
 4: 0.6877284640890462}

In [23]:
cc.between_cluster_distances()


Out[23]:
{0: 0.89597744863352391,
 1: 1.112961740525555,
 2: 1.1165583659292277,
 3: 1.4491647003363346,
 4: 1.2649243226995595}

In [24]:
# Computation:
cc.cluster_distance_matrix()
# Display it as an heat-map (with values inside)
cc.plot_cluster_distances(print_values=True)
# Save it under 'test_ClustCheck.png':
cc.plot_cluster_distances(print_values=True,savefig=csv_path+'test_ClustCheck.png')


Individual to clusters informations:


In [25]:
# Compute the distance of each individual (roots) to each cluster center:
cc.vertex2clusters_distance()


Out[25]:
{0: array([ 1.93616987,  1.44911106,  1.2819597 ,  1.27030683,  1.42562793]),
 1: array([ 0.84880346,  0.22762392,  0.77918799,  0.73618181,  0.59251253]),
 2: array([ 0.83529526,  0.17631328,  0.8078843 ,  0.75851954,  0.60925175]),
 3: array([ 0.54380091,  0.30201266,  0.81624911,  0.69206734,  0.58617088]),
 4: array([ 0.53537403,  0.26759223,  0.79000696,  0.65681084,  0.55297361]),
 5: array([ 0.58839597,  0.21041423,  0.71390956,  0.55434711,  0.46916592]),
 6: array([ 3.07271237,  5.17664121,  2.71391513,  6.70963229,  9.24524988]),
 7: array([ 0.8400115 ,  0.17815985,  0.83598103,  0.78329363,  0.63606766]),
 8: array([ 0.8230974 ,  0.16645586,  0.81763151,  0.75992703,  0.61244369]),
 9: array([ 0.79991934,  0.16911201,  0.77381874,  0.71333945,  0.56404101]),
 10: array([ 0.57442436,  0.2198626 ,  0.76780816,  0.61366027,  0.52155975]),
 11: array([ 0.57692251,  0.21578679,  0.76243916,  0.60701693,  0.51522648]),
 12: array([ 0.58992946,  0.21250684,  0.70619778,  0.5473026 ,  0.46170877]),
 13: array([ 0.76097648,  0.34525795,  0.58471922,  0.42686521,  0.35317189]),
 14: array([ 0.79010191,  0.34403986,  0.5914663 ,  0.44583249,  0.35942305]),
 15: array([ 1.12170747,  0.62838918,  0.73066173,  0.43205314,  0.51951697]),
 16: array([ 1.26500189,  1.72145336,  1.3331641 ,  1.04810677,  2.03696298]),
 17: array([ 0.86405178,  0.32380182,  0.65047685,  0.58532529,  0.46055989]),
 18: array([ 0.54714847,  0.27742355,  0.8036047 ,  0.67126976,  0.56799587]),
 19: array([ 0.55273512,  0.26588142,  0.79873972,  0.66229674,  0.55894647]),
 20: array([ 0.58233844,  0.20995142,  0.75354752,  0.59580001,  0.50754091]),
 21: array([ 0.63804599,  0.26875828,  0.62565849,  0.47858548,  0.38095007]),
 22: array([ 0.8168218 ,  0.33128095,  0.60346876,  0.48399462,  0.38019892]),
 23: array([ 1.12156162,  0.27253348,  1.85639092,  5.03487282,  2.88384149]),
 24: array([ 0.86353836,  0.33581688,  0.62828307,  0.5395642 ,  0.42669444]),
 25: array([ 1.00239625,  0.46397747,  1.65814007,  3.89932774,  1.70549092]),
 26: array([ 1.11290205,  0.26364469,  1.87245383,  5.06885328,  2.92087789]),
 27: array([ 1.0352539 ,  0.33162515,  1.71432518,  4.43337828,  2.26064413]),
 28: array([ 0.30566782,  0.55801859,  0.98853984,  0.84431537,  0.77138215]),
 29: array([ 0.30611991,  0.45182214,  0.74902629,  0.61489931,  0.51701782]),
 30: array([ 0.30722404,  0.46111924,  0.73748282,  0.60262476,  0.50426503]),
 31: array([ 0.28371932,  0.49372111,  0.78017943,  0.63878641,  0.5411935 ]),
 32: array([ 0.30069715,  0.44790225,  0.78185367,  0.64566677,  0.5503267 ]),
 33: array([ 0.29132629,  0.46731152,  0.77953419,  0.64108336,  0.54473989]),
 34: array([ 0.36146217,  0.55485847,  0.63290537,  0.47531783,  0.40615937]),
 35: array([ 0.35297298,  0.51378349,  0.64006727,  0.50399909,  0.41801892]),
 36: array([ 0.36451894,  0.5037452 ,  0.63048005,  0.49752632,  0.41134867]),
 37: array([ 0.36092062,  0.47891847,  0.64326197,  0.51724452,  0.41997825]),
 38: array([ 0.47207254,  0.53851991,  0.58358355,  0.38584312,  0.35292299]),
 39: array([ 0.36477628,  0.53383744,  0.62781388,  0.48192341,  0.40655103]),
 40: array([ 0.51145724,  0.63741113,  0.61473959,  0.33540736,  0.36315205]),
 41: array([ 0.58804519,  0.64339476,  0.61198764,  0.29489115,  0.3443239 ]),
 42: array([ 0.60065506,  0.59202037,  0.59262735,  0.28836538,  0.32537809]),
 43: array([ 0.49079922,  0.44420246,  0.56526195,  0.40625506,  0.33574729]),
 44: array([ 0.58035813,  0.63505376,  0.60812202,  0.29766305,  0.34284535]),
 45: array([ 0.46475095,  0.41722066,  0.5796741 ,  0.44246889,  0.35400831]),
 46: array([ 0.48847662,  0.40273901,  0.57377058,  0.42743677,  0.33996652]),
 47: array([ 0.69180199,  1.0639702 ,  1.36740418,  2.40454797,  0.60041986]),
 48: array([ 0.57445415,  0.49949768,  0.56951045,  0.31705705,  0.30782758]),
 49: array([ 0.75989839,  1.40699068,  1.21929921,  1.51970622,  1.28158435]),
 50: array([ 0.87167546,  1.59636292,  1.21630987,  0.97535225,  1.74553493]),
 51: array([ 0.79995696,  1.11652591,  1.3122237 ,  2.19876646,  0.60962738]),
 52: array([ 1.11610038,  2.42808291,  1.32016521,  1.43250162,  3.79924546]),
 53: array([ 0.63427952,  0.27567311,  0.61908276,  0.46922513,  0.37447893]),
 54: array([ 1.06740146,  2.1073689 ,  1.28940486,  0.81477591,  2.9420242 ]),
 55: array([ 1.3628919 ,  2.34873554,  1.42026465,  1.15898183,  3.39543243]),
 56: array([ 1.1911106 ,  1.27436984,  1.41844572,  2.25353766,  0.62923128]),
 57: array([ 1.85689158,  3.17063999,  1.71627884,  2.82082588,  5.19781448]),
 58: array([  4.22837998,   6.96849358,   3.86240074,  10.11628399,  12.78462826]),
 59: array([ 1.54244659,  1.92428426,  1.50368166,  1.48493786,  2.03722862]),
 60: array([ 1.1545281 ,  0.42663099,  1.71152066,  4.93260207,  2.80714959]),
 61: array([ 0.31277089,  0.43973767,  0.82210824,  0.68741349,  0.59328798]),
 62: array([ 0.42068682,  0.5489837 ,  0.59857304,  0.42571804,  0.37733401]),
 63: array([ 0.47735234,  0.33898833,  0.81293956,  0.70346649,  0.60068581]),
 64: array([ 0.81117024,  1.75290579,  1.21096274,  0.79366166,  2.14313197]),
 65: array([ 0.71598081,  0.94402181,  1.40829873,  2.67747335,  0.61919085]),
 66: array([ 0.95010592,  1.8597968 ,  1.23876581,  0.6698339 ,  2.36872407]),
 67: array([ 1.15570118,  2.01492577,  1.31906691,  0.71480382,  2.60010913]),
 68: array([ 1.17883824,  1.06106104,  1.46200074,  2.74615461,  0.72552841]),
 69: array([ 1.12462119,  0.27501962,  1.86174897,  5.07310081,  2.92356129]),
 70: array([ 1.21078677,  0.99410028,  1.49397768,  2.96118723,  0.89327006]),
 71: array([ 0.36694403,  0.85744642,  1.28463371,  1.15916185,  1.09849538]),
 72: array([ 0.33180508,  0.80832124,  1.23525753,  1.10665851,  1.04394645]),
 73: array([ 0.40137577,  0.89292447,  1.3239935 ,  1.20101443,  1.14197857]),
 74: array([ 0.39918616,  0.88900597,  1.321634  ,  1.19850549,  1.13937188]),
 75: array([ 0.35327349,  0.84183947,  1.26522056,  1.1385192 ,  1.07704847]),
 76: array([ 1.06147903,  0.30511206,  1.74531543,  4.73378735,  2.57246134]),
 77: array([ 3.97690184,  3.47496615,  3.83570173,  8.04480818,  6.29569618]),
 78: array([ 0.74105536,  0.17358853,  0.76268052,  0.66979076,  0.53577749]),
 79: array([ 1.11290958,  0.26364408,  1.87245603,  5.06885216,  2.92087611]),
 80: array([ 1.03859507,  0.39498962,  1.6477797 ,  4.28148012,  2.10282724]),
 81: array([ 0.2751525 ,  0.81834258,  1.10795996,  0.97129877,  0.90331295]),
 82: array([ 0.25601512,  0.7577406 ,  1.0931389 ,  0.95553904,  0.8869392 ]),
 83: array([ 0.2483474 ,  0.74293537,  1.07638924,  0.93772857,  0.86843482]),
 84: array([ 0.24157499,  0.7284668 ,  1.05804278,  0.91822016,  0.84816635]),
 85: array([ 0.26350227,  0.81858703,  0.93482426,  0.77914501,  0.72648211]),
 86: array([ 0.25090981,  0.78665967,  0.94224372,  0.79250339,  0.72762456]),
 87: array([ 0.27987225,  0.82967747,  0.8982842 ,  0.72187665,  0.68605571]),
 88: array([ 0.24010723,  0.71893176,  0.9110825 ,  0.76190853,  0.68606451]),
 89: array([ 0.32166619,  0.8475401 ,  0.86102284,  0.62191875,  0.62849718]),
 90: array([ 0.27296967,  0.7671844 ,  0.84635253,  0.67184559,  0.62890633]),
 91: array([ 0.30855657,  0.79468127,  0.82515084,  0.60831994,  0.59791915]),
 92: array([ 0.26645461,  0.71339785,  0.82340606,  0.65911677,  0.60419856]),
 93: array([ 0.63150406,  2.26146608,  1.42444743,  0.91656034,  2.85992089]),
 94: array([ 0.78524812,  2.19776984,  1.35821489,  0.7687642 ,  2.6371358 ]),
 95: array([ 1.00326222,  3.313637  ,  1.54266013,  3.13107826,  5.57646833]),
 96: array([ 0.59557799,  1.32203287,  1.46261822,  2.56529929,  0.66659705]),
 97: array([ 0.70254375,  1.89919248,  1.32615211,  1.00437799,  1.89814171]),
 98: array([ 0.52354097,  0.81643419,  1.61284965,  3.78921249,  1.6610854 ]),
 99: array([ 0.77084515,  1.55427599,  1.42671045,  2.09148792,  0.75796283])}

In [26]:
# Compute the distance of each individual (roots) to THEIR cluster center:
cc.vertex_distance2cluster_center()


Out[26]:
{0: 1.2819596976789933,
 1: 0.22762391585259648,
 2: 0.17631327503546101,
 3: 0.30201266402588628,
 4: 0.26759223169787805,
 5: 0.21041423190358133,
 6: 2.7139151326832232,
 7: 0.17815984912997371,
 8: 0.16645585968660659,
 9: 0.16911200952415981,
 10: 0.21986260069655816,
 11: 0.21578679189316693,
 12: 0.2125068356039197,
 13: 0.58471921635250246,
 14: 0.59146630439909365,
 15: 0.73066172608102053,
 16: 1.048106769154125,
 17: 0.65047684914666248,
 18: 0.27742354899857102,
 19: 0.26588142469125464,
 20: 0.20995142007197531,
 21: 0.62565849029474585,
 22: 0.60346876170737662,
 23: 0.27253347696210178,
 24: 0.62828306613264839,
 25: 0.46397747139598772,
 26: 0.26364468621538079,
 27: 0.33162514945170124,
 28: 0.30566782328495373,
 29: 0.3061199148971519,
 30: 0.30722404057317759,
 31: 0.2837193177642765,
 32: 0.30069715060809171,
 33: 0.29132629252677911,
 34: 0.63290536602278868,
 35: 0.64006726667419844,
 36: 0.63048005245368011,
 37: 0.6432619675164768,
 38: 0.58358354635830711,
 39: 0.6278138776451081,
 40: 0.61473959278790624,
 41: 0.61198764033593323,
 42: 0.59262735101005437,
 43: 0.56526194878363145,
 44: 0.60812201851957826,
 45: 0.57967409682398596,
 46: 0.57377057978996981,
 47: 0.60041985883618898,
 48: 0.56951045428017433,
 49: 1.2192992090330095,
 50: 0.97535224986859714,
 51: 0.60962737795584088,
 52: 1.432501622447758,
 53: 0.61908276488152436,
 54: 0.81477591161462004,
 55: 1.1589818275719002,
 56: 0.6292312768579128,
 57: 1.7162788359822529,
 58: 3.8624007433184135,
 59: 1.5036816603039596,
 60: 0.42663098953965672,
 61: 0.31277089178030953,
 62: 0.59857303888554769,
 63: 0.3389883308711526,
 64: 0.79366165863961502,
 65: 0.61919084894682774,
 66: 0.66983390232895401,
 67: 0.71480381748471589,
 68: 0.72552840632876858,
 69: 0.27501961714896644,
 70: 0.8932700632607008,
 71: 0.36694402938186632,
 72: 0.33180508367897943,
 73: 0.40137577329368995,
 74: 0.39918615675523594,
 75: 0.35327348860229635,
 76: 0.30511206191328483,
 77: 3.83570173031401,
 78: 0.17358852845970654,
 79: 0.26364408252305499,
 80: 0.39498962479730304,
 81: 0.27515249556541649,
 82: 0.25601511738445437,
 83: 0.24834739747845355,
 84: 0.24157499123683196,
 85: 0.26350226845004465,
 86: 0.25090980741900465,
 87: 0.27987224779308678,
 88: 0.24010723129060904,
 89: 0.32166619119413431,
 90: 0.27296967247347032,
 91: 0.30855657344460236,
 92: 0.26645460915994701,
 93: 0.91656033959867034,
 94: 0.76876420480489194,
 95: 1.0032622203899653,
 96: 0.66659705059522989,
 97: 1.0043779949576814,
 98: 0.52354096991674581,
 99: 0.75796282993090014}

In [27]:
# Display the distances to their cluster center by sorted individuals:
cc.plot_vertex_distance2cluster_center()



In [28]:
# By giving a filename (and the rigth path) is will be saved on drive:
cc.plot_vertex_distance2cluster_center(savefig=csv_path+'roots2cluster_center.png')

Displays information about values distribution among each cluster:


In [29]:
#cc.properties_boxplot_by_cluster()
cc.info_clustering["variables"]cc.properties_boxplot_by_cluster()


/usr/lib/pymodules/python2.7/matplotlib/axes.py:6031: RuntimeWarning: invalid value encountered in less_equal
  wisk_hi = np.compress(d <= hi_val, d)
/usr/lib/pymodules/python2.7/matplotlib/axes.py:6039: RuntimeWarning: invalid value encountered in greater_equal
  wisk_lo = np.compress(d >= lo_val, d)
/usr/lib/pymodules/python2.7/matplotlib/axes.py:6051: RuntimeWarning: invalid value encountered in greater
  flier_hi = np.compress(d > wisk_hi, d)
/usr/lib/pymodules/python2.7/matplotlib/axes.py:6052: RuntimeWarning: invalid value encountered in less
  flier_lo = np.compress(d < wisk_lo, d)