In [2]:
%matplotlib inline
import numpy as np
from matplotlib import pyplot as plt
In [3]:
from sklearn.datasets import make_moons, make_circles
# make classification problem
X, y = make_moons(n_samples=500, noise=0.3)
# represent the coordinates in several convenient ways
I, J = X.T
IJ = X[:,:2]
plt.gca().set_aspect('equal')
plt.scatter(I, J, c=y, cmap='cool')
Out[3]:
In [4]:
# definition of cdist from previous notebooks
def cdist(I, J):
i, j = np.mgrid[:I.shape[0],:J.shape[0]]
return np.sqrt(np.sum((I[i] - J[j])**2,axis=2))
In [5]:
# "brute force" definition of knn from previous notebook
def knn(query_pts, pts, k=3):
# brute force k nearest neighbors
D = cdist(query_pts, pts)
knn_ix = np.argsort(D,axis=1)[:,:k]
row, _ = np.mgrid[:query_pts.shape[0],:k]
knn_dist = D[row, knn_ix]
return knn_dist, knn_ix
In [13]:
from scipy import stats
# now make a grid; we will predict a class for each grid point
resolution = 100
imin, imax = np.min(I), np.max(I)
jmin, jmax = np.min(J), np.max(J)
m = np.linspace(imin, imax, resolution)
n = np.linspace(jmin, jmax, resolution)
M, N = np.meshgrid(m,n)
MN = np.dstack((M,N)).reshape(-1,2)
# now find the k nearest neighbors for each grid point
dist, nabe = knn(MN,IJ,k=7) # use odd k so we can break ties
# find the class of each neighbor
k_classes = y[nabe]
k_classes.shape
Out[13]:
In [14]:
# take a majority vote using the mode function
pred, _ = stats.mode(k_classes,axis=1)
# now reshape predictions into an image
pred_img = pred.reshape((resolution,resolution))
plt.gca().set_aspect('equal')
plt.scatter(I, J, c=y, cmap='cool')
plt.imshow(pred_img,origin='lower',cmap='cool',extent=[imin, imax, jmin, jmax])
Out[14]: