In [1]:
from sklearn.datasets import make_moons, make_circles, make_classification
X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1)
rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
datasets = [make_moons(noise=0.3, random_state=0),
make_circles(noise=0.2, factor=0.5, random_state=1),
(X, y)]
figsize(14, 5)
In [2]:
def plot_classification(name, clf, X, y, cmap):
score = clf.score(X, y)
h = 0.2
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = meshgrid(arange(x_min, x_max, h), arange(y_min, y_max, h))
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
if hasattr(clf, "decision_function"):
Z = clf.decision_function(c_[xx.ravel(), yy.ravel()])
else:
Z = clf.predict_proba(c_[xx.ravel(), yy.ravel()])[:, 1]
# Put the result into a color plot
Z = Z.reshape(xx.shape)
contourf(xx, yy, Z, cmap=cmap, alpha=.8)
scatter(X[:, 0], X[:, 1], c=y, cmap=cm.Greys)
xlim(xx.min(), xx.max())
ylim(yy.min(), yy.max())
title(name + " - Score %.2f" % score)
In [3]:
def plot_multi_class(name, clf, X, y, cmap=cm.PRGn):
score = clf.score(X, y)
h = 0.2
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = meshgrid(arange(x_min, x_max, h), arange(y_min, y_max, h))
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
Z = clf.predict(c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
contourf(xx, yy, Z, cmap=cmap, alpha=.8)
scatter(X[:, 0], X[:, 1], c=y, cmap=cm.Greys)
xlim(xx.min(), xx.max())
ylim(yy.min(), yy.max())
title(name + " - Score %.2f" % score)
In [4]:
figsize(14, 5)
for i, (X, y) in enumerate(datasets):
subplot(1,3,i+1)
scatter(X[:, 0], X[:, 1], c=y, cmap=cm.Greys)
Gostaríamos de criar um classificador capaz de apropriadamente separar duas classes e corretamente classificar novas entradas.
In [5]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
In [6]:
X, y = datasets[0]
knn.fit(X, y)
y_e = knn.predict(X)
In [7]:
figsize(8,8)
plot_classification('K Vizinhos', knn, X, y, cm.PRGn)
In [8]:
figsize(15, 5)
for dataset_idx, (X, y) in enumerate(datasets):
subplot(1, 3, dataset_idx+1)
knn.fit(X, y)
plot_classification('K Vizinhos', knn, X, y, cm.PRGn)
In [9]:
from sklearn.svm import SVC
svc = SVC(kernel='linear')
X, y = datasets[0]
svc.fit(X, y)
Out[9]:
In [10]:
figsize(8,8)
plot_classification('SVC linear', svc, X, y, cm.PRGn)
In [11]:
figsize(15, 5)
for dataset_idx, (X, y) in enumerate(datasets):
subplot(1, 3, dataset_idx+1)
svc.fit(X, y)
plot_classification('SVC linear', svc, X, y, cm.PRGn)
In [12]:
svc = SVC(kernel='poly', degree=3)
for dataset_idx, (X, y) in enumerate(datasets):
subplot(1, 3, dataset_idx+1)
svc.fit(X, y)
plot_classification('SVC Polynomial', svc, X, y, cm.PRGn)
In [13]:
svc = SVC(kernel='rbf')
for dataset_idx, (X, y) in enumerate(datasets):
subplot(1, 3, dataset_idx+1)
svc.fit(X, y)
plot_classification('SVC RBF', svc, X, y, cm.PRGn)
In [14]:
from sklearn.datasets import load_iris
iris = load_iris()
In [15]:
print iris.DESCR
In [16]:
X = iris.data[:,2:]
y = iris.target
In [17]:
figsize(8,8)
scatter(X[:,0], X[:,1], c=y)
Out[17]:
Crie um classificador capaz de separar as 3 classes de plantas.
In [20]:
svc = SVC(kernel='rbf')
svc.fit(X, y)
Out[20]:
In [21]:
figsize(8,8)
plot_multi_class('SVC - RBF', svc, X, y)