In [1]:
import sklearn as skt
In [34]:
import numpy as np
In [2]:
import pandas as pd
In [3]:
from sklearn import datasets
In [4]:
dataset = datasets.load_iris()
In [5]:
print(dataset)
In [6]:
from sklearn.cluster import KMeans
In [7]:
kmeans = KMeans(n_clusters=2, random_state=0).fit(dataset.data)
In [8]:
kmeans.labels_
Out[8]:
In [9]:
kmeans.cluster_centers_
Out[9]:
In [10]:
import matplotlib.pyplot as plt
In [11]:
plt.plot(dataset.data)
plt.show()
In [12]:
plt.plot((kmeans.cluster_centers_).data)
plt.show()
In [13]:
from sklearn.cluster import AgglomerativeClustering
In [14]:
aglomerative = AgglomerativeClustering(n_clusters = 2).fit(dataset.data)
In [15]:
aglomerative.labels_
Out[15]:
In [16]:
from sklearn import metrics
In [26]:
X = dataset.data
y = dataset.target
Métricas Homogenity e Completness
In [55]:
print(" \t{0:.3}".format(metrics.completeness_score(kmeans.predict(X),y)))
In [57]:
print(" \t{0:.3}".format(metrics.completeness_score(aglomerative.labels_,y)))
In [19]:
from sklearn.metrics.cluster import homogeneity_score
In [58]:
print(" \t{0:.3}".format(homogeneity_score(kmeans.predict(X),y)))
In [59]:
print(" \t{0:.3}".format(homogeneity_score(aglomerative.labels_, y)))
In [35]:
nC = np.arange(1,10); vetor = []
In [37]:
for n in nC:
kmeans.n_clusters = n
kmeans.fit(X)
vetor.append(kmeans.inertia_)
In [38]:
plt.figure()
plt.xlabel('Numero de clusters')
plt.ylabel('inertia')
plt.plot(nC,vetor)
Out[38]:
In [39]:
plt.show()
In [40]:
#logo o melhor número de clusters é 3
kmeans.n_clusters = 3
kmeans.fit(X)
Out[40]:
In [52]:
homogenity = homogeneity_score(y,kmeans.predict(X))
completeness = metrics.completeness_score(y,kmeans.predict(X))
In [48]:
print("Homogeneity: \t{0:.3}".format(homogenity))
In [53]:
print("Completness: \t{0:.3}".format(completeness))
In [ ]: