In [ ]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_digits, load_breast_cancer
from sklearn.svm import SVC
from sklearn.cross_validation import train_test_split, KFold, StratifiedKFold
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
In [ ]:
digits = load_digits()
X = digits.data
y = digits.target
clf = SVC(C=1.0, kernel="linear")
for i in range(5):
xtrain, xtest, ytrain, ytest = train_test_split(X, y)
clf.fit(xtrain, ytrain)
print("Iteration %d" % (i + 1), "Accuracy: %f" % clf.score(xtest, ytest))
In [ ]:
xtrain = X[:1200, :]
xtest = X[1200:, :]
ytrain = y[:1200]
ytest = y[1200:]
clf.fit(xtrain, ytrain)
print(clf.score(xtest, ytest))
In [ ]:
xtrain = X[600:, :]
xtest = X[:600, :]
ytrain = y[600:]
ytest = y[:600]
clf.fit(xtrain, ytrain)
print(clf.score(xtest, ytest))
In [ ]:
xtrain = np.r_[X[:600, :], X[1200:, :]]
xtest = X[600:1200, :]
ytrain = np.r_[y[:600], y[1200:]]
ytest = y[600:1200]
clf.fit(xtrain, ytrain)
print(clf.score(xtest, ytest))
In [ ]:
from sklearn.cross_validation import KFold
kfold = KFold(n=X.shape[0], n_folds=6, shuffle=True)
In [ ]:
for train_index, test_index in kfold:
xtrain = X[train_index, :]
ytrain = y[train_index]
xtest = X[test_index, :]
ytest = y[test_index]
clf.fit(xtrain, ytrain)
print(clf.score(xtest, ytest))
In [ ]:
_x1 = np.random.multivariate_normal(mean=[0, 0], cov=np.array([[0, 0.5], [0.5, 0]]), size=(900,))
_x2 = np.random.multivariate_normal(mean=[0.75, 0.75], cov=np.array([[0, 0.125], [0.125, 0]]), size=(100,))
X = np.r_[_x1, _x2]
y = np.zeros((X.shape[0],))
y[900:] = 1
rand_ix = np.arange(1000)
np.random.shuffle(rand_ix)
X = X[rand_ix, :]
y = y[rand_ix]
In [ ]:
# enter code here
In [ ]:
kfold = StratifiedKFold(y, n_folds=6, shuffle=True)
for train_index, test_index in kfold:
xtrain = X[train_index, :]
ytrain = y[train_index]
xtest = X[test_index, :]
ytest = y[test_index]
clf.fit(xtrain, ytrain)
print(clf.score(xtest, ytest))
In [ ]:
from sklearn.cross_validation import cross_val_score
cross_val_score(clf, X, y, cv=StratifiedKFold(y, n_folds=6))
In [ ]:
X = digits.data
y = digits.target
In [ ]:
clf = SVC(kernel="linear")
cross_val_score(clf, X, y, cv=StratifiedKFold(y, n_folds=6))
In [ ]:
Cs = np.logspace(-10, 0, 10)
accuracies = []
for C in Cs:
clf = SVC(C=C, kernel="linear")
acc = cross_val_score(clf, X, y)
accuracies.append(acc.mean())
plt.semilogx(Cs, accuracies)
plt.xlabel("$\lambda$")
plt.ylabel("Mean score")
In [ ]:
bc = load_breast_cancer()
X = bc.data
y = bc.target
In [ ]:
# enter code here
In [ ]:
from sklearn.grid_search import GridSearchCV
In [ ]:
grid = {'C': Cs}
In [ ]:
gcv = GridSearchCV(clf, param_grid=grid, n_jobs=-1)
In [ ]:
gcv.fit(X, y)
In [ ]:
gcv.best_estimator_
In [ ]:
gcv.best_params_
In [ ]:
gcv.best_score_
In [ ]:
X = digits.data
y = digits.target
grid = {"C": Cs, "kernel": ['linear', 'poly', 'linear']}
In [ ]:
# enter code here
In [ ]:
gcv.grid_scores_