Supervised Learning
learn the link between two datasets: the observed data x and an external varibale y, and try to predict.
All supervised estimator implement a $fit(x, y)$ method and $predict(x)$ method, given unlabeled observations x, the estimator returns the predicted labels y
In [1]:
import numpy as np
from sklearn import datasets
iris = datasets.load_iris()
iris_X = iris.data
iris_y = iris.target
np.random.seed(0)
indices = np.random.permutation(len(iris_X))
iris_X_train = iris_X[indices[:-10]]
iris_y_train = iris_y[indices[:-10]]
iris_X_test = iris_X[indices[-10:]]
iris_y_test = iris_y[indices[-10:]]
# create and fit a nearst-neighbor classifier
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(iris_X_train, iris_y_train)
Out[1]:
In [2]:
knn.predict(iris_X_test)
Out[2]:
In [3]:
iris_y_test
Out[3]:
In [9]:
diabetes = datasets.load_diabetes()
diabetes_X_train = diabetes.data[:-20]
diabetes_y_train = diabetes.target[:-20]
diabetes_X_test = diabetes.data[-20:]
diabetes_y_test = diabetes.target[-20:]
from sklearn import linear_model
regr = linear_model.LinearRegression()
regr.fit(diabetes_X_train, diabetes_y_train)
Out[9]:
In [10]:
regr.coef_
Out[10]:
In [11]:
# mean square error
np.mean((regr.predict(diabetes_X_test)-diabetes_y_test)**2)
Out[11]:
In [12]:
regr.score(diabetes_X_test,diabetes_y_test)
Out[12]:
In [14]:
X = np.c_[0.5, 1].T
y = [0.5, 1]
test = np.c_[0, 2].T
regr = linear_model.LinearRegression()
import matplotlib.pyplot as plt
plt.figure()
np.random.seed(0)
for _ in range(6):
this_X = .1 * np.random.normal(size=(2,1)) + X
regr.fit(this_X, y)
plt.plot(test, regr.predict(test))
plt.scatter(this_X, y, s=3)
plt.show()
In [15]:
regr = linear_model.Ridge(alpha=0.1)
plt.figure()
np.random.seed(0)
for _ in range(6):
this_X = 0.1 * np.random.normal(size=(2,1)) + X
regr.fit(this_X, y)
plt.plot(test, regr.predict(test))
plt.scatter(this_X, y, s=3)
plt.show()
An example of bias/variance tradeoff, the larger the ridge $\alpha$ parameter, the higher the bias and the lower the variance
In [24]:
# take the first two features
X = iris.data[:,:2]
Y = iris.target
h = .02
logreg = linear_model.LogisticRegression(C=1e5)
logreg.fit(X,Y)
x_min, x_max = X[:, 0].min() - .5, X[:,0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.figure(1, figsize=(4,3))
plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)
plt.scatter(X[:,0], X[:, 1], c=Y, edgecolors ='k', cmap=plt.cm.Paired)
plt.show()
In [29]:
from sklearn import svm, datasets
X = iris.data[:,:2]
y = iris.target
h = .02
C = 1.0
svc = svm.SVC(kernel='linear', C=C).fit(X, y)
rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C).fit(X, y)
poly_svc = svm.SVC(kernel='poly', degree=3, C=C).fit(X, y)
lin_svc = svm.LinearSVC(C=C).fit(X, y)
x_min, x_max = X[:, 0].min() - 1, X[:,0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
titles = ['SVC with linear kernel',
'LinearSVC(Linear kernel)',
'SVC with RBF kernel',
'SVC with polynomial(degree 3) kernel']
for i, clf in enumerate((svc, lin_svc, rbf_svc, poly_svc)):
plt.subplot(2, 2, i+1)
plt.subplots_adjust(wspace=0.4, hspace=0.4)
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8)
plt.scatter(X[:,0], X[:,1], c=y, cmap=plt.cm.coolwarm)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xticks(())
plt.yticks(())
plt.title(titles[i])
plt.show()
In [ ]: