notebook.community

Edit and run



In [5]:

    
%pylab inline
from sklearn.cluster import KMeans
import sklearn









    



Populating the interactive namespace from numpy and matplotlib



In [74]:

    
data=genfromtxt('wine.dat',delimiter=',')
X=data[:,1:]
Y=data[:,0]
n=30
score=zeros(n)

for i,j in zip(logspace(0,2,n),range(n)):
    Kclusters=KMeans(n_clusters=int(i+1))
    Kclusters.fit(X)
    groups=Kclusters.predict(X)
    conversion=sklearn.linear_model.LogisticRegression()
    conversion.fit(transpose([groups]),Y)
    predicted=conversion.predict(transpose([groups]))
    score[j]=conversion.score(transpose([predicted]),Y)



In [71]:

    
plot(score)









    Out[71]:





[<matplotlib.lines.Line2D at 0x7f4edbfb5890>]



In [26]:

    
print groups
print predicted
print Y









    



[1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 1 2 2 1 1 2 1 1 1 1 1 1 2 2
 1 1 2 2 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 2 0 2 0 0 2 0 0 2 2 2 0 0 1
 2 0 0 0 2 0 0 2 2 0 0 0 0 0 2 2 0 0 0 0 0 2 2 0 2 0 2 0 0 0 2 0 0 0 0 2 0
 0 2 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 2 0 0 2 2 2 2 0 0 0 2 2 0 0 2 2 0 2
 2 0 0 0 0 2 2 2 0 2 2 2 0 2 0 2 2 0 2 2 2 2 0 0 2 2 2 2 2 0]
[ 2.  2.  2.  2.  1.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.
  2.  1.  1.  1.  2.  2.  1.  1.  2.  2.  1.  2.  2.  2.  2.  2.  2.  1.
  1.  2.  2.  1.  1.  2.  2.  1.  1.  2.  2.  2.  2.  2.  2.  2.  2.  2.
  2.  2.  2.  2.  2.  2.  1.  2.  1.  2.  2.  1.  2.  2.  1.  1.  1.  2.
  2.  2.  1.  2.  2.  2.  1.  2.  2.  1.  1.  2.  2.  2.  2.  2.  1.  1.
  2.  2.  2.  2.  2.  1.  1.  2.  1.  2.  1.  2.  2.  2.  1.  2.  2.  2.
  2.  1.  2.  2.  1.  2.  2.  2.  2.  2.  2.  2.  1.  2.  2.  2.  2.  2.
  2.  2.  2.  2.  1.  2.  2.  1.  1.  1.  1.  2.  2.  2.  1.  1.  2.  2.
  1.  1.  2.  1.  1.  2.  2.  2.  2.  1.  1.  1.  2.  1.  1.  1.  2.  1.
  2.  1.  1.  2.  1.  1.  1.  1.  2.  2.  1.  1.  1.  1.  1.  2.]
[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.
  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.
  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.
  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.  2.
  2.  2.  2.  2.  3.  3.  3.  3.  3.  3.  3.  3.  3.  3.  3.  3.  3.  3.
  3.  3.  3.  3.  3.  3.  3.  3.  3.  3.  3.  3.  3.  3.  3.  3.  3.  3.
  3.  3.  3.  3.  3.  3.  3.  3.  3.  3.  3.  3.  3.  3.  3.  3.]



In [10]:

    
shape(groups)









    Out[10]:





(178,)



In [32]:

    
from sklearn import linear_model, datasets
iris = datasets.load_iris()
X = iris.data[:, :2]  # we only take the first two features.
Y = iris.target



In [34]:









    



[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 1 2 1 2 1 2 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1
 2 2 2 2 1 1 1 1 1 1 1 2 2 1 1 1 1 2 1 1 1 1 1 2 1 1 2 1 2 2 2 2 1 2 2 2 2
 2 2 1 1 2 2 2 2 1 2 1 2 1 2 2 1 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 1 2 2 2 1 2
 2 1]



In [ ]: