In [21]:
from sklearn.datasets import make_regression,make_classification
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
In [22]:
X, y = make_classification(n_samples=100,n_features=10,n_informative=2)
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42)
In [23]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape
Out[23]:
In [24]:
# it takes a list of tuples as parameter
pipeline = Pipeline([
('scaler',StandardScaler()),
('clf', LogisticRegression())
])
# use the pipeline object as you would
# a regular classifier
pipeline.fit(X_train,y_train)
Out[24]:
In [25]:
y_preds = pipeline.predict(X_test)
In [26]:
accuracy_score(y_test,y_preds)
Out[26]: