In [1]:
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.neighbors import LargeMarginNearestNeighbor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris, get_data_home
from sklearn.model_selection import train_test_split

In [2]:
# Load and split dataset
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=42)

In [3]:
# Train and test with k-nearest neighbor classifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
test_acc = knn.score(X_test, y_test)
print('KNN accuracy on iris test set is {:5.2f}%.'.format(100*test_acc))


KNN accuracy on iris test set is 95.56%.

In [4]:
# Construct LMNN classifier with Pipeline
lmnn = LargeMarginNearestNeighbor(n_neighbors=3, random_state=42)
lmnn_clf = Pipeline([('lin_transform', lmnn), ('clf', KNeighborsClassifier(n_neighbors=3))])

In [5]:
# Train and test with LMNN classifier
lmnn_clf.fit(X_train, y_train)
lmnn_acc = lmnn_clf.score(X_test, y_test)
print('LMNN accuracy on iris test set is {:5.2f}%.'.format(100*lmnn_acc))


LMNN accuracy on iris test set is 97.78%.

In [6]:
# Sanity Check
pipe_probs = lmnn_clf.predict_proba(X_test)
lmnn.fit(X_train, y_train)
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(lmnn.transform(X_train), y_train)
seq_probs = knn.predict_proba(lmnn.transform(X_test))

print(np.array_equal(pipe_probs, seq_probs))


True

In [ ]: