In [1]:
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.grid_search import GridSearchCV
from sklearn.svm import SVC
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest
In [2]:
iris = load_iris()
X, y = iris.data, iris.target
In [3]:
# This dataset is way to high-dimensional. Better do PCA:
pca = PCA(n_components=2)
In [4]:
# Maybe some original features where good, too?
selection = SelectKBest(k=1)
In [5]:
# Build estimator from PCA and Univariate selection:
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
In [6]:
# Use combined features to transform dataset:
X_features = combined_features.fit(X, y).transform(X)
In [7]:
# Classify:
svm = SVC(kernel="linear")
svm.fit(X_features, y)
Out[7]:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
kernel='linear', max_iter=-1, probability=False, random_state=None,
shrinking=True, tol=0.001, verbose=False)
In [10]:
# Do grid search over k, n_components and C:
pipeline = Pipeline([("features", combined_features), ("svm", svm)])
param_grid = dict(features__pca__n_components=[1, 2, 3],
features__univ_select__k=[1, 2],
svm__C=[0.1, 1, 10])
grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=10)
grid_search.fit(X, y)
Fitting 3 folds for each of 18 candidates, totalling 54 fits
[CV] features__pca__n_components=1, svm__C=0.1, features__univ_select__k=1
[CV] features__pca__n_components=1, svm__C=0.1, features__univ_select__k=1, score=0.960784 - 0.0s
[CV] features__pca__n_components=1, svm__C=0.1, features__univ_select__k=1
[CV] features__pca__n_components=1, svm__C=0.1, features__univ_select__k=1, score=0.901961 - 0.0s
[CV] features__pca__n_components=1, svm__C=0.1, features__univ_select__k=1
[CV] features__pca__n_components=1, svm__C=0.1, features__univ_select__k=1, score=0.979167 - 0.0s
[CV] features__pca__n_components=1, svm__C=1, features__univ_select__k=1
[CV] features__pca__n_components=1, svm__C=1, features__univ_select__k=1, score=0.941176 - 0.0s
[CV] features__pca__n_components=1, svm__C=1, features__univ_select__k=1
[CV] features__pca__n_components=1, svm__C=1, features__univ_select__k=1, score=0.921569 - 0.0s
[CV] features__pca__n_components=1, svm__C=1, features__univ_select__k=1
[CV] features__pca__n_components=1, svm__C=1, features__univ_select__k=1, score=0.979167 - 0.0s
[CV] features__pca__n_components=1, svm__C=10, features__univ_select__k=1
[CV] features__pca__n_components=1, svm__C=10, features__univ_select__k=1, score=0.960784 - 0.0s
[CV] features__pca__n_components=1, svm__C=10, features__univ_select__k=1
[CV] features__pca__n_components=1, svm__C=10, features__univ_select__k=1, score=0.921569 - 0.0s
[CV] features__pca__n_components=1, svm__C=10, features__univ_select__k=1
[CV] features__pca__n_components=1, svm__C=10, features__univ_select__k=1, score=0.979167 - 0.0s
[CV] features__pca__n_components=1, svm__C=0.1, features__univ_select__k=2
[CV] features__pca__n_components=1, svm__C=0.1, features__univ_select__k=2, score=0.960784 - 0.0s
[CV] features__pca__n_components=1, svm__C=0.1, features__univ_select__k=2
[CV] features__pca__n_components=1, svm__C=0.1, features__univ_select__k=2, score=0.921569 - 0.0s
[CV] features__pca__n_components=1, svm__C=0.1, features__univ_select__k=2
[CV] features__pca__n_components=1, svm__C=0.1, features__univ_select__k=2, score=0.979167 - 0.0s
[CV] features__pca__n_components=1, svm__C=1, features__univ_select__k=2
[CV] features__pca__n_components=1, svm__C=1, features__univ_select__k=2, score=0.960784 - 0.0s
[CV] features__pca__n_components=1, svm__C=1, features__univ_select__k=2
[CV] features__pca__n_components=1, svm__C=1, features__univ_select__k=2, score=0.921569 - 0.0s
[CV] features__pca__n_components=1, svm__C=1, features__univ_select__k=2
[CV] features__pca__n_components=1, svm__C=1, features__univ_select__k=2, score=1.000000 - 0.0s
[CV] features__pca__n_components=1, svm__C=10, features__univ_select__k=2
[CV] features__pca__n_components=1, svm__C=10, features__univ_select__k=2, score=0.980392 - 0.0s
[CV] features__pca__n_components=1, svm__C=10, features__univ_select__k=2
[CV] features__pca__n_components=1, svm__C=10, features__univ_select__k=2, score=0.901961 - 0.0s
[CV] features__pca__n_components=1, svm__C=10, features__univ_select__k=2
[CV] features__pca__n_components=1, svm__C=10, features__univ_select__k=2, score=1.000000 - 0.0s
[CV] features__pca__n_components=2, svm__C=0.1, features__univ_select__k=1
[CV] features__pca__n_components=2, svm__C=0.1, features__univ_select__k=1, score=0.960784 - 0.0s
[Parallel(n_jobs=1)]: Done 1 jobs | elapsed: 0.0s
[Parallel(n_jobs=1)]: Done 2 jobs | elapsed: 0.0s
[Parallel(n_jobs=1)]: Done 5 jobs | elapsed: 0.0s
[Parallel(n_jobs=1)]: Done 8 jobs | elapsed: 0.0s
[Parallel(n_jobs=1)]: Done 13 jobs | elapsed: 0.0s
[Parallel(n_jobs=1)]: Done 18 jobs | elapsed: 0.0s
[Parallel(n_jobs=1)]: Done 25 jobs | elapsed: 0.0s
[CV] features__pca__n_components=2, svm__C=0.1, features__univ_select__k=1
[CV] features__pca__n_components=2, svm__C=0.1, features__univ_select__k=1, score=0.901961 - 0.0s
[CV] features__pca__n_components=2, svm__C=0.1, features__univ_select__k=1
[CV] features__pca__n_components=2, svm__C=0.1, features__univ_select__k=1, score=0.979167 - 0.0s
[CV] features__pca__n_components=2, svm__C=1, features__univ_select__k=1
[CV] features__pca__n_components=2, svm__C=1, features__univ_select__k=1, score=0.980392 - 0.0s
[CV] features__pca__n_components=2, svm__C=1, features__univ_select__k=1
[CV] features__pca__n_components=2, svm__C=1, features__univ_select__k=1, score=0.941176 - 0.0s
[CV] features__pca__n_components=2, svm__C=1, features__univ_select__k=1
[CV] features__pca__n_components=2, svm__C=1, features__univ_select__k=1, score=0.979167 - 0.0s
[CV] features__pca__n_components=2, svm__C=10, features__univ_select__k=1
[CV] features__pca__n_components=2, svm__C=10, features__univ_select__k=1, score=0.980392 - 0.0s
[CV] features__pca__n_components=2, svm__C=10, features__univ_select__k=1
[CV] features__pca__n_components=2, svm__C=10, features__univ_select__k=1, score=0.941176 - 0.0s
[CV] features__pca__n_components=2, svm__C=10, features__univ_select__k=1
[CV] features__pca__n_components=2, svm__C=10, features__univ_select__k=1, score=0.979167 - 0.0s
[CV] features__pca__n_components=2, svm__C=0.1, features__univ_select__k=2
[CV] features__pca__n_components=2, svm__C=0.1, features__univ_select__k=2, score=0.980392 - 0.0s
[CV] features__pca__n_components=2, svm__C=0.1, features__univ_select__k=2
[CV] features__pca__n_components=2, svm__C=0.1, features__univ_select__k=2, score=0.941176 - 0.0s
[CV] features__pca__n_components=2, svm__C=0.1, features__univ_select__k=2
[CV] features__pca__n_components=2, svm__C=0.1, features__univ_select__k=2, score=0.979167 - 0.0s
[CV] features__pca__n_components=2, svm__C=1, features__univ_select__k=2
[CV] features__pca__n_components=2, svm__C=1, features__univ_select__k=2, score=1.000000 - 0.0s
[CV] features__pca__n_components=2, svm__C=1, features__univ_select__k=2
[CV] features__pca__n_components=2, svm__C=1, features__univ_select__k=2, score=0.960784 - 0.0s
[CV] features__pca__n_components=2, svm__C=1, features__univ_select__k=2
[CV] features__pca__n_components=2, svm__C=1, features__univ_select__k=2, score=0.979167 - 0.0s
[CV] features__pca__n_components=2, svm__C=10, features__univ_select__k=2
[CV] features__pca__n_components=2, svm__C=10, features__univ_select__k=2, score=0.980392 - 0.0s
[CV] features__pca__n_components=2, svm__C=10, features__univ_select__k=2
[CV] features__pca__n_components=2, svm__C=10, features__univ_select__k=2, score=0.921569 - 0.0s
[CV] features__pca__n_components=2, svm__C=10, features__univ_select__k=2
[CV] features__pca__n_components=2, svm__C=10, features__univ_select__k=2, score=1.000000 - 0.0s
[CV] features__pca__n_components=3, svm__C=0.1, features__univ_select__k=1
[CV] features__pca__n_components=3, svm__C=0.1, features__univ_select__k=1, score=0.980392 - 0.0s
[CV] features__pca__n_components=3, svm__C=0.1, features__univ_select__k=1
[CV] features__pca__n_components=3, svm__C=0.1, features__univ_select__k=1, score=0.941176 - 0.0s
[CV] features__pca__n_components=3, svm__C=0.1, features__univ_select__k=1
[CV] features__pca__n_components=3, svm__C=0.1, features__univ_select__k=1, score=0.979167 - 0.0s
[Parallel(n_jobs=1)]: Done 32 jobs | elapsed: 0.0s
[Parallel(n_jobs=1)]: Done 41 jobs | elapsed: 0.0s
[Parallel(n_jobs=1)]: Done 50 jobs | elapsed: 0.0s
[Parallel(n_jobs=1)]: Done 54 out of 54 | elapsed: 0.0s finished
[CV] features__pca__n_components=3, svm__C=1, features__univ_select__k=1
[CV] features__pca__n_components=3, svm__C=1, features__univ_select__k=1, score=1.000000 - 0.0s
[CV] features__pca__n_components=3, svm__C=1, features__univ_select__k=1
[CV] features__pca__n_components=3, svm__C=1, features__univ_select__k=1, score=0.941176 - 0.0s
[CV] features__pca__n_components=3, svm__C=1, features__univ_select__k=1
[CV] features__pca__n_components=3, svm__C=1, features__univ_select__k=1, score=0.979167 - 0.0s
[CV] features__pca__n_components=3, svm__C=10, features__univ_select__k=1
[CV] features__pca__n_components=3, svm__C=10, features__univ_select__k=1, score=1.000000 - 0.0s
[CV] features__pca__n_components=3, svm__C=10, features__univ_select__k=1
[CV] features__pca__n_components=3, svm__C=10, features__univ_select__k=1, score=0.921569 - 0.0s
[CV] features__pca__n_components=3, svm__C=10, features__univ_select__k=1
[CV] features__pca__n_components=3, svm__C=10, features__univ_select__k=1, score=1.000000 - 0.0s
[CV] features__pca__n_components=3, svm__C=0.1, features__univ_select__k=2
[CV] features__pca__n_components=3, svm__C=0.1, features__univ_select__k=2, score=0.980392 - 0.0s
[CV] features__pca__n_components=3, svm__C=0.1, features__univ_select__k=2
[CV] features__pca__n_components=3, svm__C=0.1, features__univ_select__k=2, score=0.941176 - 0.0s
[CV] features__pca__n_components=3, svm__C=0.1, features__univ_select__k=2
[CV] features__pca__n_components=3, svm__C=0.1, features__univ_select__k=2, score=0.979167 - 0.0s
[CV] features__pca__n_components=3, svm__C=1, features__univ_select__k=2
[CV] features__pca__n_components=3, svm__C=1, features__univ_select__k=2, score=1.000000 - 0.0s
[CV] features__pca__n_components=3, svm__C=1, features__univ_select__k=2
[CV] features__pca__n_components=3, svm__C=1, features__univ_select__k=2, score=0.960784 - 0.0s
[CV] features__pca__n_components=3, svm__C=1, features__univ_select__k=2
[CV] features__pca__n_components=3, svm__C=1, features__univ_select__k=2, score=0.979167 - 0.0s
[CV] features__pca__n_components=3, svm__C=10, features__univ_select__k=2
[CV] features__pca__n_components=3, svm__C=10, features__univ_select__k=2, score=1.000000 - 0.0s
[CV] features__pca__n_components=3, svm__C=10, features__univ_select__k=2
[CV] features__pca__n_components=3, svm__C=10, features__univ_select__k=2, score=0.921569 - 0.0s
[CV] features__pca__n_components=3, svm__C=10, features__univ_select__k=2
[CV] features__pca__n_components=3, svm__C=10, features__univ_select__k=2, score=1.000000 - 0.0s
Out[10]:
GridSearchCV(cv=None,
estimator=Pipeline(steps=[('features', FeatureUnion(n_jobs=1,
transformer_list=[('pca', PCA(copy=True, n_components=2, whiten=False)), ('univ_select', SelectKBest(k=1, score_func=<function f_classif at 0x0000000012D4CB38>))],
transformer_weights=None)), ('svm', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
kernel='linear', max_iter=-1, probability=False, random_state=None,
shrinking=True, tol=0.001, verbose=False))]),
fit_params={}, iid=True, loss_func=None, n_jobs=1,
param_grid={'features__pca__n_components': [1, 2, 3], 'svm__C': [0.1, 1, 10], 'features__univ_select__k': [1, 2]},
pre_dispatch='2*n_jobs', refit=True, score_func=None, scoring=None,
verbose=10)
In [11]:
print(grid_search.best_estimator_)
Pipeline(steps=[('features', FeatureUnion(n_jobs=1,
transformer_list=[('pca', PCA(copy=True, n_components=2, whiten=False)), ('univ_select', SelectKBest(k=2, score_func=<function f_classif at 0x0000000012D4CB38>))],
transformer_weights=None)), ('svm', SVC(C=1, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
kernel='linear', max_iter=-1, probability=False, random_state=None,
shrinking=True, tol=0.001, verbose=False))])
In [11]:
In [ ]:
Content source: zhongyuanzhou/FCH808.github.io
Similar notebooks: