In [1]:
import pandas as pd
import pylab as pl
import numpy as np
from sklearn.cluster import *

In [2]:
cols = ['area', 'perimeter', 'compactness', 'lenght', 'width', 'asymmetry', 'grove', '?']
seeds = pd.read_csv('seeds.csv', header=None, names = cols, index_col=None)

In [3]:
seeds.head()


c:\Users\timaeudg\Anaconda\lib\site-packages\pandas\core\config.py:570: DeprecationWarning: height has been deprecated.

  warnings.warn(d.msg, DeprecationWarning)
c:\Users\timaeudg\Anaconda\lib\site-packages\pandas\core\config.py:570: DeprecationWarning: height has been deprecated.

  warnings.warn(d.msg, DeprecationWarning)
Out[3]:
area perimeter compactness lenght width asymmetry grove ?
0 15.26 14.84 0.8710 5.763 3.312 2.221 5.220 1
1 14.88 14.57 0.8811 5.554 3.333 1.018 4.956 1
2 14.29 14.09 0.9050 5.291 3.337 2.699 4.825 1
3 13.84 13.94 0.8955 5.324 3.379 2.259 4.805 1
4 16.14 14.99 0.9034 5.658 3.562 1.355 5.175 1

In [5]:
x = seeds.asymmetry.values
y = seeds.grove.values

In [6]:
pl.scatter(x,y, s=100)


Out[6]:
<matplotlib.collections.PathCollection at 0x151d67b8>

In [9]:
X = array([x,y]).T

In [10]:
X


Out[10]:
array([[ 2.221 ,  5.22  ],
       [ 1.018 ,  4.956 ],
       [ 2.699 ,  4.825 ],
       [ 2.259 ,  4.805 ],
       [ 1.355 ,  5.175 ],
       [ 2.462 ,  4.956 ],
       [ 3.586 ,  5.219 ],
       [ 2.7   ,  5.    ],
       [ 2.04  ,  5.877 ],
       [ 1.969 ,  5.533 ],
       [ 4.543 ,  5.314 ],
       [ 1.717 ,  5.001 ],
       [ 3.986 ,  4.738 ],
       [ 3.136 ,  4.872 ],
       [ 2.932 ,  4.825 ],
       [ 4.185 ,  4.781 ],
       [ 5.234 ,  4.781 ],
       [ 1.599 ,  5.046 ],
       [ 1.767 ,  4.649 ],
       [ 4.102 ,  4.914 ],
       [ 3.072 ,  5.176 ],
       [ 2.688 ,  5.219 ],
       [ 0.7651,  5.091 ],
       [ 1.415 ,  4.961 ],
       [ 1.791 ,  5.001 ],
       [ 0.903 ,  5.307 ],
       [ 3.373 ,  4.825 ],
       [ 2.504 ,  4.869 ],
       [ 2.754 ,  5.038 ],
       [ 3.531 ,  5.097 ],
       [ 0.8551,  5.056 ],
       [ 3.412 ,  5.228 ],
       [ 3.92  ,  5.299 ],
       [ 2.124 ,  5.012 ],
       [ 2.129 ,  5.36  ],
       [ 2.27  ,  5.443 ],
       [ 2.823 ,  5.527 ],
       [ 2.956 ,  5.484 ],
       [ 3.112 ,  5.309 ],
       [ 6.685 ,  5.001 ],
       [ 2.587 ,  5.178 ],
       [ 2.249 ,  5.176 ],
       [ 2.461 ,  4.783 ],
       [ 4.711 ,  5.528 ],
       [ 3.128 ,  5.18  ],
       [ 1.56  ,  4.961 ],
       [ 1.367 ,  5.132 ],
       [ 2.958 ,  5.175 ],
       [ 2.704 ,  5.111 ],
       [ 2.129 ,  5.351 ],
       [ 3.975 ,  5.144 ],
       [ 5.593 ,  5.136 ],
       [ 4.116 ,  5.396 ],
       [ 3.328 ,  5.224 ],
       [ 1.481 ,  5.487 ],
       [ 1.933 ,  5.439 ],
       [ 2.802 ,  5.044 ],
       [ 1.142 ,  5.088 ],
       [ 1.999 ,  5.222 ],
       [ 1.502 ,  4.519 ],
       [ 2.7   ,  4.607 ],
       [ 2.269 ,  4.703 ],
       [ 3.22  ,  4.605 ],
       [ 4.157 ,  5.088 ],
       [ 1.176 ,  4.782 ],
       [ 2.352 ,  4.607 ],
       [ 1.313 ,  5.15  ],
       [ 2.217 ,  5.132 ],
       [ 1.464 ,  5.3   ],
       [ 3.533 ,  5.067 ],
       [ 4.076 ,  6.06  ],
       [ 4.675 ,  5.877 ],
       [ 4.539 ,  5.791 ],
       [ 2.936 ,  6.079 ],
       [ 4.004 ,  5.841 ],
       [ 4.92  ,  5.795 ],
       [ 3.824 ,  5.922 ],
       [ 4.451 ,  6.451 ],
       [ 5.064 ,  6.362 ],
       [ 2.858 ,  5.746 ],
       [ 5.532 ,  5.88  ],
       [ 5.324 ,  5.879 ],
       [ 5.173 ,  6.187 ],
       [ 1.472 ,  6.273 ],
       [ 2.962 ,  6.185 ],
       [ 2.443 ,  6.197 ],
       [ 1.649 ,  6.109 ],
       [ 3.691 ,  6.498 ],
       [ 5.78  ,  6.231 ],
       [ 5.016 ,  6.321 ],
       [ 1.955 ,  6.449 ],
       [ 3.12  ,  6.053 ],
       [ 3.237 ,  6.053 ],
       [ 6.001 ,  5.877 ],
       [ 4.933 ,  6.448 ],
       [ 3.696 ,  5.967 ],
       [ 3.477 ,  6.238 ],
       [ 2.144 ,  6.453 ],
       [ 2.853 ,  6.273 ],
       [ 2.188 ,  6.097 ],
       [ 4.217 ,  5.618 ],
       [ 2.068 ,  5.837 ],
       [ 4.308 ,  6.009 ],
       [ 3.357 ,  6.229 ],
       [ 3.368 ,  6.148 ],
       [ 2.553 ,  5.879 ],
       [ 2.843 ,  6.2   ],
       [ 3.747 ,  5.929 ],
       [ 3.252 ,  6.55  ],
       [ 1.738 ,  5.894 ],
       [ 2.235 ,  5.794 ],
       [ 3.678 ,  5.965 ],
       [ 2.109 ,  5.924 ],
       [ 6.682 ,  6.053 ],
       [ 4.677 ,  6.316 ],
       [ 2.248 ,  6.163 ],
       [ 4.334 ,  5.75  ],
       [ 3.084 ,  6.185 ],
       [ 3.639 ,  5.966 ],
       [ 3.063 ,  6.32  ],
       [ 5.901 ,  6.188 ],
       [ 3.619 ,  6.011 ],
       [ 4.286 ,  5.703 ],
       [ 2.984 ,  5.905 ],
       [ 3.336 ,  5.144 ],
       [ 4.188 ,  5.992 ],
       [ 4.391 ,  6.102 ],
       [ 2.257 ,  5.919 ],
       [ 1.91  ,  6.185 ],
       [ 5.366 ,  5.661 ],
       [ 2.837 ,  5.962 ],
       [ 2.908 ,  5.949 ],
       [ 4.462 ,  5.795 ],
       [ 4.266 ,  5.795 ],
       [ 4.972 ,  5.847 ],
       [ 3.6   ,  5.439 ],
       [ 3.526 ,  5.971 ],
       [ 2.64  ,  5.879 ],
       [ 2.725 ,  5.752 ],
       [ 3.769 ,  5.922 ],
       [ 5.304 ,  5.395 ],
       [ 7.035 ,  5.44  ],
       [ 5.995 ,  5.307 ],
       [ 5.469 ,  5.221 ],
       [ 4.471 ,  5.178 ],
       [ 6.169 ,  5.275 ],
       [ 2.221 ,  5.132 ],
       [ 4.421 ,  5.002 ],
       [ 3.26  ,  5.316 ],
       [ 5.462 ,  5.194 ],
       [ 5.195 ,  5.307 ],
       [ 6.992 ,  5.27  ],
       [ 4.756 ,  5.36  ],
       [ 3.332 ,  5.001 ],
       [ 4.048 ,  5.263 ],
       [ 5.813 ,  5.219 ],
       [ 3.347 ,  5.003 ],
       [ 4.825 ,  5.22  ],
       [ 4.378 ,  5.31  ],
       [ 5.388 ,  5.31  ],
       [ 3.082 ,  5.491 ],
       [ 4.271 ,  5.308 ],
       [ 4.988 ,  5.046 ],
       [ 4.419 ,  5.176 ],
       [ 6.388 ,  5.049 ],
       [ 2.201 ,  5.056 ],
       [ 4.924 ,  5.27  ],
       [ 3.638 ,  5.338 ],
       [ 4.337 ,  5.132 ],
       [ 3.521 ,  5.088 ],
       [ 6.735 ,  5.163 ],
       [ 6.715 ,  4.956 ],
       [ 4.309 ,  5.    ],
       [ 5.588 ,  5.089 ],
       [ 5.182 ,  5.185 ],
       [ 4.773 ,  5.063 ],
       [ 5.335 ,  5.092 ],
       [ 4.702 ,  4.963 ],
       [ 5.876 ,  5.002 ],
       [ 1.661 ,  5.178 ],
       [ 4.957 ,  4.825 ],
       [ 4.987 ,  5.147 ],
       [ 4.857 ,  5.158 ],
       [ 5.209 ,  5.135 ],
       [ 6.185 ,  5.316 ],
       [ 4.062 ,  5.182 ],
       [ 4.898 ,  5.352 ],
       [ 4.179 ,  4.956 ],
       [ 7.524 ,  4.957 ],
       [ 4.975 ,  4.794 ],
       [ 5.398 ,  5.045 ],
       [ 3.985 ,  5.001 ],
       [ 3.597 ,  5.132 ],
       [ 4.853 ,  5.089 ],
       [ 4.132 ,  5.012 ],
       [ 4.873 ,  4.914 ],
       [ 5.483 ,  4.958 ],
       [ 4.67  ,  5.091 ],
       [ 3.306 ,  5.231 ],
       [ 2.828 ,  4.83  ],
       [ 5.472 ,  5.045 ],
       [ 2.3   ,  4.745 ],
       [ 4.051 ,  4.828 ],
       [ 8.456 ,  5.    ],
       [ 3.919 ,  5.001 ],
       [ 3.631 ,  4.87  ],
       [ 4.325 ,  5.003 ],
       [ 8.315 ,  5.056 ],
       [ 3.598 ,  5.044 ],
       [ 5.637 ,  5.063 ]])

In [11]:
km = KMeans(n_clusters = 2, n_init=1000)

In [12]:
km.fit(X)


Out[12]:
KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=2, n_init=1000,
    n_jobs=1, precompute_distances=True, random_state=None, tol=0.0001,
    verbose=0)

In [13]:
print km.inertia_


204.241183065

In [14]:
pl.scatter(X[:,0], X[:,1], c=km.labels_, s=100)


Out[14]:
<matplotlib.collections.PathCollection at 0x1548c048>

In [30]:
km = KMeans(n_clusters = 15, n_init=10000)
km.fit(X)
print km.inertia_
pl.scatter(X[:,0], X[:,1], c=km.labels_, s=50)


17.8882682098
Out[30]:
<matplotlib.collections.PathCollection at 0x279ba080>

In [31]:
from sklearn.preprocessing import *

In [35]:
scale= MinMaxScaler()
X_std = scale.fit_transform(X)

In [37]:
X_std


Out[37]:
array([[ 0.18930164,  0.34515017],
       [ 0.03288302,  0.21516494],
       [ 0.25145302,  0.1506647 ],
       [ 0.19424255,  0.14081733],
       [ 0.07670104,  0.3229936 ],
       [ 0.22063738,  0.21516494],
       [ 0.36678412,  0.3446578 ],
       [ 0.25158304,  0.23682915],
       [ 0.16576734,  0.66863614],
       [ 0.15653565,  0.49926145],
       [ 0.49121689,  0.39143279],
       [ 0.12376965,  0.23732152],
       [ 0.41879364,  0.10782866],
       [ 0.30827341,  0.17380601],
       [ 0.28174856,  0.1506647 ],
       [ 0.44466837,  0.12900049],
       [ 0.58106333,  0.12900049],
       [ 0.10842684,  0.25947809],
       [ 0.13027084,  0.06400788],
       [ 0.4338764 ,  0.19448548],
       [ 0.29995189,  0.32348597],
       [ 0.25002275,  0.3446578 ],
       [ 0.        ,  0.28163466],
       [ 0.08450246,  0.21762678],
       [ 0.13339141,  0.23732152],
       [ 0.01793028,  0.38798621],
       [ 0.33908905,  0.1506647 ],
       [ 0.22609838,  0.1723289 ],
       [ 0.25860432,  0.25553914],
       [ 0.35963281,  0.28458887],
       [ 0.01170214,  0.26440177],
       [ 0.34415998,  0.34908912],
       [ 0.41021207,  0.38404727],
       [ 0.17668933,  0.24273757],
       [ 0.17733945,  0.41408173],
       [ 0.19567281,  0.4549483 ],
       [ 0.26757597,  0.49630724],
       [ 0.28486913,  0.4751354 ],
       [ 0.30515284,  0.38897095],
       [ 0.76972786,  0.23732152],
       [ 0.23689035,  0.3244707 ],
       [ 0.19294231,  0.32348597],
       [ 0.22050735,  0.12998523],
       [ 0.51306089,  0.49679961],
       [ 0.30723322,  0.32545544],
       [ 0.10335591,  0.21762678],
       [ 0.07826132,  0.30182176],
       [ 0.28512918,  0.3229936 ],
       [ 0.25210313,  0.29148203],
       [ 0.17733945,  0.40965042],
       [ 0.41736338,  0.30773018],
       [ 0.62774188,  0.30379124],
       [ 0.43569673,  0.43180699],
       [ 0.33323798,  0.34711965],
       [ 0.09308403,  0.47661251],
       [ 0.15185479,  0.45297883],
       [ 0.26484547,  0.25849335],
       [ 0.04900597,  0.28015756],
       [ 0.16043636,  0.34613491],
       [ 0.09581453,  0.        ],
       [ 0.25158304,  0.04332841],
       [ 0.19554278,  0.09059577],
       [ 0.31919541,  0.04234367],
       [ 0.44102771,  0.28015756],
       [ 0.05342678,  0.12949286],
       [ 0.20633476,  0.04332841],
       [ 0.07124004,  0.31068439],
       [ 0.18878155,  0.30182176],
       [ 0.09087363,  0.38453964],
       [ 0.35989286,  0.26981782],
       [ 0.43049578,  0.75873954],
       [ 0.50838003,  0.66863614],
       [ 0.4906968 ,  0.62629247],
       [ 0.28226866,  0.76809453],
       [ 0.42113407,  0.65091088],
       [ 0.54023586,  0.62826194],
       [ 0.39772978,  0.69079271],
       [ 0.4792547 ,  0.95125554],
       [ 0.55895929,  0.90743476],
       [ 0.2721268 ,  0.60413589],
       [ 0.61981043,  0.67011324],
       [ 0.59276548,  0.66962088],
       [ 0.57313188,  0.82127031],
       [ 0.09191382,  0.86361398],
       [ 0.28564927,  0.82028557],
       [ 0.21816692,  0.82619399],
       [ 0.11492803,  0.78286558],
       [ 0.38043662,  0.97439685],
       [ 0.65205633,  0.84293452],
       [ 0.55271815,  0.88724766],
       [ 0.15471531,  0.9502708 ],
       [ 0.30619303,  0.75529296],
       [ 0.32140582,  0.75529296],
       [ 0.68079158,  0.66863614],
       [ 0.54192617,  0.94977843],
       [ 0.38108674,  0.71294929],
       [ 0.35261153,  0.84638109],
       [ 0.17928981,  0.95224028],
       [ 0.27147668,  0.86361398],
       [ 0.18501086,  0.77695716],
       [ 0.44882914,  0.54111275],
       [ 0.169408  ,  0.64894141],
       [ 0.4606613 ,  0.73362875],
       [ 0.33700867,  0.84194978],
       [ 0.33843893,  0.80206795],
       [ 0.23246954,  0.66962088],
       [ 0.27017644,  0.8276711 ],
       [ 0.38771795,  0.69423929],
       [ 0.32335617,  1.        ],
       [ 0.12650015,  0.6770064 ],
       [ 0.19112198,  0.62776957],
       [ 0.37874631,  0.71196455],
       [ 0.17473898,  0.69177745],
       [ 0.76933779,  0.75529296],
       [ 0.50864008,  0.88478582],
       [ 0.19281228,  0.80945347],
       [ 0.46404192,  0.60610537],
       [ 0.30151218,  0.82028557],
       [ 0.37367538,  0.71245692],
       [ 0.29878168,  0.88675529],
       [ 0.66778921,  0.82176268],
       [ 0.37107491,  0.73461349],
       [ 0.45780078,  0.58296406],
       [ 0.2885098 ,  0.68242245],
       [ 0.33427817,  0.30773018],
       [ 0.44505845,  0.72525849],
       [ 0.47145328,  0.77941901],
       [ 0.1939825 ,  0.68931561],
       [ 0.14886424,  0.82028557],
       [ 0.59822648,  0.56228459],
       [ 0.2693963 ,  0.71048744],
       [ 0.27862799,  0.70408666],
       [ 0.48068497,  0.62826194],
       [ 0.4552003 ,  0.62826194],
       [ 0.5469971 ,  0.65386509],
       [ 0.36860445,  0.45297883],
       [ 0.35898269,  0.71491876],
       [ 0.24378161,  0.66962088],
       [ 0.25483363,  0.6070901 ],
       [ 0.39057848,  0.69079271],
       [ 0.590165  ,  0.43131462],
       [ 0.81523619,  0.4534712 ],
       [ 0.68001144,  0.38798621],
       [ 0.61161893,  0.34564254],
       [ 0.48185518,  0.3244707 ],
       [ 0.70263558,  0.37223043],
       [ 0.18930164,  0.30182176],
       [ 0.47535399,  0.23781388],
       [ 0.32439636,  0.39241753],
       [ 0.61070876,  0.3323486 ],
       [ 0.57599241,  0.38798621],
       [ 0.80964517,  0.36976859],
       [ 0.51891196,  0.41408173],
       [ 0.33375808,  0.23732152],
       [ 0.42685511,  0.36632201],
       [ 0.65634711,  0.3446578 ],
       [ 0.33570843,  0.23830625],
       [ 0.5278836 ,  0.34515017],
       [ 0.46976297,  0.38946332],
       [ 0.601087  ,  0.38946332],
       [ 0.30125213,  0.47858198],
       [ 0.45585042,  0.38847858],
       [ 0.54907748,  0.25947809],
       [ 0.47509394,  0.32348597],
       [ 0.73111079,  0.26095519],
       [ 0.18670117,  0.26440177],
       [ 0.54075596,  0.36976859],
       [ 0.37354536,  0.40324963],
       [ 0.46443199,  0.30182176],
       [ 0.35833257,  0.28015756],
       [ 0.77622905,  0.31708518],
       [ 0.77362857,  0.21516494],
       [ 0.46079132,  0.23682915],
       [ 0.62709176,  0.28064993],
       [ 0.5743021 ,  0.32791728],
       [ 0.52112237,  0.26784835],
       [ 0.59419574,  0.28212703],
       [ 0.51189068,  0.21861152],
       [ 0.66453861,  0.23781388],
       [ 0.11648832,  0.3244707 ],
       [ 0.54504674,  0.1506647 ],
       [ 0.54894746,  0.30920729],
       [ 0.53204436,  0.31462334],
       [ 0.57781274,  0.30329887],
       [ 0.70471596,  0.39241753],
       [ 0.42867545,  0.32644018],
       [ 0.53737534,  0.41014279],
       [ 0.44388823,  0.21516494],
       [ 0.87881782,  0.21565731],
       [ 0.54738717,  0.13540128],
       [ 0.60238724,  0.25898572],
       [ 0.41866362,  0.23732152],
       [ 0.36821438,  0.30182176],
       [ 0.53152427,  0.28064993],
       [ 0.43777711,  0.24273757],
       [ 0.53412474,  0.19448548],
       [ 0.61343926,  0.21614968],
       [ 0.50772991,  0.28163466],
       [ 0.33037746,  0.35056622],
       [ 0.26822609,  0.15312654],
       [ 0.612009  ,  0.25898572],
       [ 0.19957352,  0.11127523],
       [ 0.42724519,  0.1521418 ],
       [ 1.        ,  0.23682915],
       [ 0.41008205,  0.23732152],
       [ 0.37263519,  0.17282127],
       [ 0.46287171,  0.23830625],
       [ 0.98166664,  0.26440177],
       [ 0.36834441,  0.25849335],
       [ 0.63346292,  0.26784835]])

In [40]:
km = KMeans(n_clusters = 2, n_init=1000)
km.fit(X_std)
print km.inertia_
pl.scatter(X[:,0], X[:,1], c=km.labels_, s=50)


10.279566141
Out[40]:
<matplotlib.collections.PathCollection at 0x275f2f60>

In [ ]: