In [1]:
import pandas as pd
import pylab as pl
import numpy as np
from sklearn.cluster import *
In [2]:
cols = ['area', 'perimeter', 'compactness', 'lenght', 'width', 'asymmetry', 'grove', '?']
seeds = pd.read_csv('seeds.csv', header=None, names = cols, index_col=None)
In [3]:
seeds.head()
c:\Users\timaeudg\Anaconda\lib\site-packages\pandas\core\config.py:570: DeprecationWarning: height has been deprecated.
warnings.warn(d.msg, DeprecationWarning)
c:\Users\timaeudg\Anaconda\lib\site-packages\pandas\core\config.py:570: DeprecationWarning: height has been deprecated.
warnings.warn(d.msg, DeprecationWarning)
Out[3]:
area
perimeter
compactness
lenght
width
asymmetry
grove
?
0
15.26
14.84
0.8710
5.763
3.312
2.221
5.220
1
1
14.88
14.57
0.8811
5.554
3.333
1.018
4.956
1
2
14.29
14.09
0.9050
5.291
3.337
2.699
4.825
1
3
13.84
13.94
0.8955
5.324
3.379
2.259
4.805
1
4
16.14
14.99
0.9034
5.658
3.562
1.355
5.175
1
In [5]:
x = seeds.asymmetry.values
y = seeds.grove.values
In [6]:
pl.scatter(x,y, s=100)
Out[6]:
<matplotlib.collections.PathCollection at 0x151d67b8>
In [9]:
X = array([x,y]).T
In [10]:
X
Out[10]:
array([[ 2.221 , 5.22 ],
[ 1.018 , 4.956 ],
[ 2.699 , 4.825 ],
[ 2.259 , 4.805 ],
[ 1.355 , 5.175 ],
[ 2.462 , 4.956 ],
[ 3.586 , 5.219 ],
[ 2.7 , 5. ],
[ 2.04 , 5.877 ],
[ 1.969 , 5.533 ],
[ 4.543 , 5.314 ],
[ 1.717 , 5.001 ],
[ 3.986 , 4.738 ],
[ 3.136 , 4.872 ],
[ 2.932 , 4.825 ],
[ 4.185 , 4.781 ],
[ 5.234 , 4.781 ],
[ 1.599 , 5.046 ],
[ 1.767 , 4.649 ],
[ 4.102 , 4.914 ],
[ 3.072 , 5.176 ],
[ 2.688 , 5.219 ],
[ 0.7651, 5.091 ],
[ 1.415 , 4.961 ],
[ 1.791 , 5.001 ],
[ 0.903 , 5.307 ],
[ 3.373 , 4.825 ],
[ 2.504 , 4.869 ],
[ 2.754 , 5.038 ],
[ 3.531 , 5.097 ],
[ 0.8551, 5.056 ],
[ 3.412 , 5.228 ],
[ 3.92 , 5.299 ],
[ 2.124 , 5.012 ],
[ 2.129 , 5.36 ],
[ 2.27 , 5.443 ],
[ 2.823 , 5.527 ],
[ 2.956 , 5.484 ],
[ 3.112 , 5.309 ],
[ 6.685 , 5.001 ],
[ 2.587 , 5.178 ],
[ 2.249 , 5.176 ],
[ 2.461 , 4.783 ],
[ 4.711 , 5.528 ],
[ 3.128 , 5.18 ],
[ 1.56 , 4.961 ],
[ 1.367 , 5.132 ],
[ 2.958 , 5.175 ],
[ 2.704 , 5.111 ],
[ 2.129 , 5.351 ],
[ 3.975 , 5.144 ],
[ 5.593 , 5.136 ],
[ 4.116 , 5.396 ],
[ 3.328 , 5.224 ],
[ 1.481 , 5.487 ],
[ 1.933 , 5.439 ],
[ 2.802 , 5.044 ],
[ 1.142 , 5.088 ],
[ 1.999 , 5.222 ],
[ 1.502 , 4.519 ],
[ 2.7 , 4.607 ],
[ 2.269 , 4.703 ],
[ 3.22 , 4.605 ],
[ 4.157 , 5.088 ],
[ 1.176 , 4.782 ],
[ 2.352 , 4.607 ],
[ 1.313 , 5.15 ],
[ 2.217 , 5.132 ],
[ 1.464 , 5.3 ],
[ 3.533 , 5.067 ],
[ 4.076 , 6.06 ],
[ 4.675 , 5.877 ],
[ 4.539 , 5.791 ],
[ 2.936 , 6.079 ],
[ 4.004 , 5.841 ],
[ 4.92 , 5.795 ],
[ 3.824 , 5.922 ],
[ 4.451 , 6.451 ],
[ 5.064 , 6.362 ],
[ 2.858 , 5.746 ],
[ 5.532 , 5.88 ],
[ 5.324 , 5.879 ],
[ 5.173 , 6.187 ],
[ 1.472 , 6.273 ],
[ 2.962 , 6.185 ],
[ 2.443 , 6.197 ],
[ 1.649 , 6.109 ],
[ 3.691 , 6.498 ],
[ 5.78 , 6.231 ],
[ 5.016 , 6.321 ],
[ 1.955 , 6.449 ],
[ 3.12 , 6.053 ],
[ 3.237 , 6.053 ],
[ 6.001 , 5.877 ],
[ 4.933 , 6.448 ],
[ 3.696 , 5.967 ],
[ 3.477 , 6.238 ],
[ 2.144 , 6.453 ],
[ 2.853 , 6.273 ],
[ 2.188 , 6.097 ],
[ 4.217 , 5.618 ],
[ 2.068 , 5.837 ],
[ 4.308 , 6.009 ],
[ 3.357 , 6.229 ],
[ 3.368 , 6.148 ],
[ 2.553 , 5.879 ],
[ 2.843 , 6.2 ],
[ 3.747 , 5.929 ],
[ 3.252 , 6.55 ],
[ 1.738 , 5.894 ],
[ 2.235 , 5.794 ],
[ 3.678 , 5.965 ],
[ 2.109 , 5.924 ],
[ 6.682 , 6.053 ],
[ 4.677 , 6.316 ],
[ 2.248 , 6.163 ],
[ 4.334 , 5.75 ],
[ 3.084 , 6.185 ],
[ 3.639 , 5.966 ],
[ 3.063 , 6.32 ],
[ 5.901 , 6.188 ],
[ 3.619 , 6.011 ],
[ 4.286 , 5.703 ],
[ 2.984 , 5.905 ],
[ 3.336 , 5.144 ],
[ 4.188 , 5.992 ],
[ 4.391 , 6.102 ],
[ 2.257 , 5.919 ],
[ 1.91 , 6.185 ],
[ 5.366 , 5.661 ],
[ 2.837 , 5.962 ],
[ 2.908 , 5.949 ],
[ 4.462 , 5.795 ],
[ 4.266 , 5.795 ],
[ 4.972 , 5.847 ],
[ 3.6 , 5.439 ],
[ 3.526 , 5.971 ],
[ 2.64 , 5.879 ],
[ 2.725 , 5.752 ],
[ 3.769 , 5.922 ],
[ 5.304 , 5.395 ],
[ 7.035 , 5.44 ],
[ 5.995 , 5.307 ],
[ 5.469 , 5.221 ],
[ 4.471 , 5.178 ],
[ 6.169 , 5.275 ],
[ 2.221 , 5.132 ],
[ 4.421 , 5.002 ],
[ 3.26 , 5.316 ],
[ 5.462 , 5.194 ],
[ 5.195 , 5.307 ],
[ 6.992 , 5.27 ],
[ 4.756 , 5.36 ],
[ 3.332 , 5.001 ],
[ 4.048 , 5.263 ],
[ 5.813 , 5.219 ],
[ 3.347 , 5.003 ],
[ 4.825 , 5.22 ],
[ 4.378 , 5.31 ],
[ 5.388 , 5.31 ],
[ 3.082 , 5.491 ],
[ 4.271 , 5.308 ],
[ 4.988 , 5.046 ],
[ 4.419 , 5.176 ],
[ 6.388 , 5.049 ],
[ 2.201 , 5.056 ],
[ 4.924 , 5.27 ],
[ 3.638 , 5.338 ],
[ 4.337 , 5.132 ],
[ 3.521 , 5.088 ],
[ 6.735 , 5.163 ],
[ 6.715 , 4.956 ],
[ 4.309 , 5. ],
[ 5.588 , 5.089 ],
[ 5.182 , 5.185 ],
[ 4.773 , 5.063 ],
[ 5.335 , 5.092 ],
[ 4.702 , 4.963 ],
[ 5.876 , 5.002 ],
[ 1.661 , 5.178 ],
[ 4.957 , 4.825 ],
[ 4.987 , 5.147 ],
[ 4.857 , 5.158 ],
[ 5.209 , 5.135 ],
[ 6.185 , 5.316 ],
[ 4.062 , 5.182 ],
[ 4.898 , 5.352 ],
[ 4.179 , 4.956 ],
[ 7.524 , 4.957 ],
[ 4.975 , 4.794 ],
[ 5.398 , 5.045 ],
[ 3.985 , 5.001 ],
[ 3.597 , 5.132 ],
[ 4.853 , 5.089 ],
[ 4.132 , 5.012 ],
[ 4.873 , 4.914 ],
[ 5.483 , 4.958 ],
[ 4.67 , 5.091 ],
[ 3.306 , 5.231 ],
[ 2.828 , 4.83 ],
[ 5.472 , 5.045 ],
[ 2.3 , 4.745 ],
[ 4.051 , 4.828 ],
[ 8.456 , 5. ],
[ 3.919 , 5.001 ],
[ 3.631 , 4.87 ],
[ 4.325 , 5.003 ],
[ 8.315 , 5.056 ],
[ 3.598 , 5.044 ],
[ 5.637 , 5.063 ]])
In [11]:
km = KMeans(n_clusters = 2, n_init=1000)
In [12]:
km.fit(X)
Out[12]:
KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=2, n_init=1000,
n_jobs=1, precompute_distances=True, random_state=None, tol=0.0001,
verbose=0)
In [13]:
print km.inertia_
204.241183065
In [14]:
pl.scatter(X[:,0], X[:,1], c=km.labels_, s=100)
Out[14]:
<matplotlib.collections.PathCollection at 0x1548c048>
In [30]:
km = KMeans(n_clusters = 15, n_init=10000)
km.fit(X)
print km.inertia_
pl.scatter(X[:,0], X[:,1], c=km.labels_, s=50)
17.8882682098
Out[30]:
<matplotlib.collections.PathCollection at 0x279ba080>
In [31]:
from sklearn.preprocessing import *
In [35]:
scale= MinMaxScaler()
X_std = scale.fit_transform(X)
In [37]:
X_std
Out[37]:
array([[ 0.18930164, 0.34515017],
[ 0.03288302, 0.21516494],
[ 0.25145302, 0.1506647 ],
[ 0.19424255, 0.14081733],
[ 0.07670104, 0.3229936 ],
[ 0.22063738, 0.21516494],
[ 0.36678412, 0.3446578 ],
[ 0.25158304, 0.23682915],
[ 0.16576734, 0.66863614],
[ 0.15653565, 0.49926145],
[ 0.49121689, 0.39143279],
[ 0.12376965, 0.23732152],
[ 0.41879364, 0.10782866],
[ 0.30827341, 0.17380601],
[ 0.28174856, 0.1506647 ],
[ 0.44466837, 0.12900049],
[ 0.58106333, 0.12900049],
[ 0.10842684, 0.25947809],
[ 0.13027084, 0.06400788],
[ 0.4338764 , 0.19448548],
[ 0.29995189, 0.32348597],
[ 0.25002275, 0.3446578 ],
[ 0. , 0.28163466],
[ 0.08450246, 0.21762678],
[ 0.13339141, 0.23732152],
[ 0.01793028, 0.38798621],
[ 0.33908905, 0.1506647 ],
[ 0.22609838, 0.1723289 ],
[ 0.25860432, 0.25553914],
[ 0.35963281, 0.28458887],
[ 0.01170214, 0.26440177],
[ 0.34415998, 0.34908912],
[ 0.41021207, 0.38404727],
[ 0.17668933, 0.24273757],
[ 0.17733945, 0.41408173],
[ 0.19567281, 0.4549483 ],
[ 0.26757597, 0.49630724],
[ 0.28486913, 0.4751354 ],
[ 0.30515284, 0.38897095],
[ 0.76972786, 0.23732152],
[ 0.23689035, 0.3244707 ],
[ 0.19294231, 0.32348597],
[ 0.22050735, 0.12998523],
[ 0.51306089, 0.49679961],
[ 0.30723322, 0.32545544],
[ 0.10335591, 0.21762678],
[ 0.07826132, 0.30182176],
[ 0.28512918, 0.3229936 ],
[ 0.25210313, 0.29148203],
[ 0.17733945, 0.40965042],
[ 0.41736338, 0.30773018],
[ 0.62774188, 0.30379124],
[ 0.43569673, 0.43180699],
[ 0.33323798, 0.34711965],
[ 0.09308403, 0.47661251],
[ 0.15185479, 0.45297883],
[ 0.26484547, 0.25849335],
[ 0.04900597, 0.28015756],
[ 0.16043636, 0.34613491],
[ 0.09581453, 0. ],
[ 0.25158304, 0.04332841],
[ 0.19554278, 0.09059577],
[ 0.31919541, 0.04234367],
[ 0.44102771, 0.28015756],
[ 0.05342678, 0.12949286],
[ 0.20633476, 0.04332841],
[ 0.07124004, 0.31068439],
[ 0.18878155, 0.30182176],
[ 0.09087363, 0.38453964],
[ 0.35989286, 0.26981782],
[ 0.43049578, 0.75873954],
[ 0.50838003, 0.66863614],
[ 0.4906968 , 0.62629247],
[ 0.28226866, 0.76809453],
[ 0.42113407, 0.65091088],
[ 0.54023586, 0.62826194],
[ 0.39772978, 0.69079271],
[ 0.4792547 , 0.95125554],
[ 0.55895929, 0.90743476],
[ 0.2721268 , 0.60413589],
[ 0.61981043, 0.67011324],
[ 0.59276548, 0.66962088],
[ 0.57313188, 0.82127031],
[ 0.09191382, 0.86361398],
[ 0.28564927, 0.82028557],
[ 0.21816692, 0.82619399],
[ 0.11492803, 0.78286558],
[ 0.38043662, 0.97439685],
[ 0.65205633, 0.84293452],
[ 0.55271815, 0.88724766],
[ 0.15471531, 0.9502708 ],
[ 0.30619303, 0.75529296],
[ 0.32140582, 0.75529296],
[ 0.68079158, 0.66863614],
[ 0.54192617, 0.94977843],
[ 0.38108674, 0.71294929],
[ 0.35261153, 0.84638109],
[ 0.17928981, 0.95224028],
[ 0.27147668, 0.86361398],
[ 0.18501086, 0.77695716],
[ 0.44882914, 0.54111275],
[ 0.169408 , 0.64894141],
[ 0.4606613 , 0.73362875],
[ 0.33700867, 0.84194978],
[ 0.33843893, 0.80206795],
[ 0.23246954, 0.66962088],
[ 0.27017644, 0.8276711 ],
[ 0.38771795, 0.69423929],
[ 0.32335617, 1. ],
[ 0.12650015, 0.6770064 ],
[ 0.19112198, 0.62776957],
[ 0.37874631, 0.71196455],
[ 0.17473898, 0.69177745],
[ 0.76933779, 0.75529296],
[ 0.50864008, 0.88478582],
[ 0.19281228, 0.80945347],
[ 0.46404192, 0.60610537],
[ 0.30151218, 0.82028557],
[ 0.37367538, 0.71245692],
[ 0.29878168, 0.88675529],
[ 0.66778921, 0.82176268],
[ 0.37107491, 0.73461349],
[ 0.45780078, 0.58296406],
[ 0.2885098 , 0.68242245],
[ 0.33427817, 0.30773018],
[ 0.44505845, 0.72525849],
[ 0.47145328, 0.77941901],
[ 0.1939825 , 0.68931561],
[ 0.14886424, 0.82028557],
[ 0.59822648, 0.56228459],
[ 0.2693963 , 0.71048744],
[ 0.27862799, 0.70408666],
[ 0.48068497, 0.62826194],
[ 0.4552003 , 0.62826194],
[ 0.5469971 , 0.65386509],
[ 0.36860445, 0.45297883],
[ 0.35898269, 0.71491876],
[ 0.24378161, 0.66962088],
[ 0.25483363, 0.6070901 ],
[ 0.39057848, 0.69079271],
[ 0.590165 , 0.43131462],
[ 0.81523619, 0.4534712 ],
[ 0.68001144, 0.38798621],
[ 0.61161893, 0.34564254],
[ 0.48185518, 0.3244707 ],
[ 0.70263558, 0.37223043],
[ 0.18930164, 0.30182176],
[ 0.47535399, 0.23781388],
[ 0.32439636, 0.39241753],
[ 0.61070876, 0.3323486 ],
[ 0.57599241, 0.38798621],
[ 0.80964517, 0.36976859],
[ 0.51891196, 0.41408173],
[ 0.33375808, 0.23732152],
[ 0.42685511, 0.36632201],
[ 0.65634711, 0.3446578 ],
[ 0.33570843, 0.23830625],
[ 0.5278836 , 0.34515017],
[ 0.46976297, 0.38946332],
[ 0.601087 , 0.38946332],
[ 0.30125213, 0.47858198],
[ 0.45585042, 0.38847858],
[ 0.54907748, 0.25947809],
[ 0.47509394, 0.32348597],
[ 0.73111079, 0.26095519],
[ 0.18670117, 0.26440177],
[ 0.54075596, 0.36976859],
[ 0.37354536, 0.40324963],
[ 0.46443199, 0.30182176],
[ 0.35833257, 0.28015756],
[ 0.77622905, 0.31708518],
[ 0.77362857, 0.21516494],
[ 0.46079132, 0.23682915],
[ 0.62709176, 0.28064993],
[ 0.5743021 , 0.32791728],
[ 0.52112237, 0.26784835],
[ 0.59419574, 0.28212703],
[ 0.51189068, 0.21861152],
[ 0.66453861, 0.23781388],
[ 0.11648832, 0.3244707 ],
[ 0.54504674, 0.1506647 ],
[ 0.54894746, 0.30920729],
[ 0.53204436, 0.31462334],
[ 0.57781274, 0.30329887],
[ 0.70471596, 0.39241753],
[ 0.42867545, 0.32644018],
[ 0.53737534, 0.41014279],
[ 0.44388823, 0.21516494],
[ 0.87881782, 0.21565731],
[ 0.54738717, 0.13540128],
[ 0.60238724, 0.25898572],
[ 0.41866362, 0.23732152],
[ 0.36821438, 0.30182176],
[ 0.53152427, 0.28064993],
[ 0.43777711, 0.24273757],
[ 0.53412474, 0.19448548],
[ 0.61343926, 0.21614968],
[ 0.50772991, 0.28163466],
[ 0.33037746, 0.35056622],
[ 0.26822609, 0.15312654],
[ 0.612009 , 0.25898572],
[ 0.19957352, 0.11127523],
[ 0.42724519, 0.1521418 ],
[ 1. , 0.23682915],
[ 0.41008205, 0.23732152],
[ 0.37263519, 0.17282127],
[ 0.46287171, 0.23830625],
[ 0.98166664, 0.26440177],
[ 0.36834441, 0.25849335],
[ 0.63346292, 0.26784835]])
In [40]:
km = KMeans(n_clusters = 2, n_init=1000)
km.fit(X_std)
print km.inertia_
pl.scatter(X[:,0], X[:,1], c=km.labels_, s=50)
10.279566141
Out[40]:
<matplotlib.collections.PathCollection at 0x275f2f60>
In [ ]:
Content source: timaeudg/CSSE490-DataMining
Similar notebooks: