In this notebook, I will use pipeline to preprocess data, construct model and perform model evaluation with k-fold cross validation. The pipeline will do the following steps:
In [2]:
from sklearn.datasets import load_iris
from sklearn.pipeline import make_pipeline
from sklearn import preprocessing
from sklearn import model_selection
from sklearn import svm
In [3]:
# load iris data
iris = load_iris()
X = iris.data
y = iris.target
In [4]:
# Create a pipeline that scales the data then trains a support vector classifier
classifier_pipeline = make_pipeline(preprocessing.StandardScaler(), svm.SVC(C=1))
In [5]:
scores = model_selection.cross_val_score(classifier_pipeline, X, y, cv=3)
In [6]:
scores
Out[6]:
In [7]:
scores.mean()
Out[7]: