In [ ]:
import numpy as np
from sklearn.datasets import load_iris, load_digits
from sklearn.metrics import f1_score
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
In [ ]:
iris = load_iris()
X = iris.data
y = iris.target
In [ ]:
print(X.shape)
In [ ]:
pca = PCA(n_components=2)
X = pca.fit_transform(X)
In [ ]:
km = KMeans()
km.fit(X)
clusters = km.predict(X)
plt.scatter(X[:, 0], X[:, 1], c=clusters, alpha=0.5)
plt.scatter(km.cluster_centers_[:, 0], km.cluster_centers_[:, 1],
c=np.arange(km.n_clusters), marker='x', s=150, linewidth=3)
In [ ]:
km = KMeans(n_clusters=3)
km.fit(X)
clusters = km.predict(X)
plt.scatter(X[:, 0], X[:, 1], c=clusters, alpha=0.5)
plt.scatter(km.cluster_centers_[:, 0], km.cluster_centers_[:, 1],
c=np.arange(km.n_clusters), marker='x', s=150, linewidth=3)
In [ ]:
print("Clustering F1 Score: %f" % f1_score(y, clusters))
In [ ]:
print(y)
In [ ]:
print(clusters)
In [ ]:
c_mapped = clusters.copy()
c_mapped[clusters == 1] = 0
c_mapped[clusters == 2] = 1
c_mapped[clusters == 0] = 2
In [ ]:
print("Clustering F1 Score: %f" % f1_score(y, c_mapped))
In [ ]:
from scipy.misc import face
racoon = face(gray=True)
fig, ax = plt.subplots(nrows=1, ncols=2)
ax[0].imshow(racoon, cmap=plt.cm.gray)
ax[0].set_xticks([])
ax[0].set_yticks([])
_ = ax[1].hist(racoon.reshape(-1, 1), bins=256,
normed=True, color='.5', edgecolor='.5')
plt.tight_layout()
In [ ]:
X = racoon.reshape(-1, 1)
km = KMeans(n_clusters=5)
km.fit(X)
values = km.cluster_centers_.ravel()
labels = km.labels_
rac_compressed = np.choose(labels, values)
rac_compressed.shape = racoon.shape
fig, ax = plt.subplots(nrows=1, ncols=2)
ax[0].imshow(rac_compressed, cmap=plt.cm.gray)
ax[0].set_xticks([])
ax[0].set_yticks([])
_ = ax[1].hist(rac_compressed.reshape(-1, 1), bins=256,
normed=True, color='.5', edgecolor='.5')
plt.tight_layout()
In [ ]:
digits = load_digits()
X = digits.data
y = digits.target
In [ ]:
# enter code here