In [ ]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
In [ ]:
from sklearn.datasets import load_digits
from sklearn.cross_validation import train_test_split
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, random_state=0)
In [ ]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
svm = SVC().fit(X_train_scaled, y_train)
In [ ]:
X_test_scaled = scaler.transform(X_test)
svm.score(X_test_scaled, y_test)
In [ ]:
from sklearn.pipeline import Pipeline
# verbose constructor
pipe = Pipeline([("my_scaler", StandardScaler()), ("my_svm", SVC())])
pipe.fit(X_train, y_train);
In [ ]:
pipe.score(X_test, y_test)
In [ ]:
pipe.decision_function(X_train).shape
In [ ]:
pipe.predict_proba(X_train)
In [ ]:
from sklearn.pipeline import make_pipeline
# shortcut
pipe = make_pipeline(StandardScaler(), SVC())
pipe.fit(X_train, y_train)
pipe.score(X_test, y_test)
In [ ]:
from sklearn.feature_selection import SelectFdr, VarianceThreshold
pipe = make_pipeline(VarianceThreshold(), StandardScaler(), SelectFdr(), SVC())
pipe.fit(X_train, y_train)
pipe.score(X_test, y_test)
In [ ]:
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
cluster_pipe = make_pipeline(PCA(n_components=10), KMeans(n_clusters=10))
cluster_pipe.fit(X_train)
cluster_pipe.predict(X_train)
In [ ]:
cluster_pipe.named_steps['pca']
In [ ]:
cluster_pipe.named_steps['pca'].components_.shape
In [ ]:
cluster_pipe.named_steps['kmeans']
In [ ]:
cluster_pipe.named_steps['kmeans'].cluster_centers_.shape
In [ ]:
cluster_pipe = Pipeline([("my_pca",PCA(n_components=10)), ("my_clustering", KMeans(n_clusters=10))])
cluster_pipe.fit(X_train)
In [ ]:
print(cluster_pipe.named_steps['my_pca'])
print(cluster_pipe.named_steps['my_clustering'])
In [ ]:
In [ ]: