In [1]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
plt.style.use('ggplot')
In [3]:
np.random.seed(42)
In [4]:
single_data_point = np.random.randint(0, 100, 2)
single_data_point
Out[4]:
In [5]:
single_label = np.random.randint(0, 2)
single_label
Out[5]:
In [6]:
def generate_data(num_samples, num_features=2):
'''Randomly generates a number of data points'''
data_size = (num_samples, num_features)
data = np.random.randint(0,100, size=data_size)
labels_size = (num_samples, 1)
labels = np.random.randint(0, 2, size=labels_size)
# OpenCV can be a bit finicky when it comes to datatypes
# make sure to always convert your data points
# to np.float32
return data.astype(np.float32), labels
In [7]:
train_data, labels = generate_data(11)
train_data
Out[7]:
In [8]:
train_data[0], labels[0]
Out[8]:
In [9]:
plt.plot(train_data[0,0], train_data[0,1], 'sb')
plt.xlabel('x coordinate')
plt.ylabel('y coordinate')
Out[9]:
In [10]:
def plot_data(all_blue, all_red):
plt.scatter(all_blue[:,0], all_blue[:,1],\
c='b', marker='s', s=180)
plt.scatter(all_red[:,0], all_red[:,1],\
c='r', marker='^', s=180)
plt.xlabel('x coordinate (feature 1)')
plt.ylabel('y coordinate (feature 2)')
In [11]:
labels.ravel() == 0
Out[11]:
In [12]:
blue = train_data[labels.ravel() == 0]
In [13]:
red = train_data[labels.ravel() == 1]
In [14]:
plot_data(blue, red)
In [15]:
knn = cv2.ml.KNearest_create()
In [16]:
knn.train(train_data, cv2.ml.ROW_SAMPLE, labels)
Out[16]:
In [17]:
newcomer, _ = generate_data(1)
In [18]:
plot_data(blue, red)
plt.plot(newcomer[0,0], newcomer[0,1], 'go',\
markersize=14);
In [19]:
ret, results, neighbor, dist = knn.findNearest(newcomer,1)
print('predicted label:\t', results)
print("Neighbor's label:\t", neighbor)
print('Distance to neighbor:\t', dist)
In [20]:
ret, results, neighbors, dist = knn.findNearest(newcomer,7)
print("Predicted label:\t", results)
print("Neighbors' labels:\t", neighbors)
print("Distance to neighbors:\t", dist)
In [ ]: