In [1]:
import graphlab

Load CIFAR-10 dataset


In [48]:
image_train = graphlab.SFrame('image_train_data/')

In [49]:
image_train.head()


Out[49]:
id image label deep_features image_array
24 Height: 32 Width: 32 bird [0.242871761322,
1.09545373917, 0.0, ...
[73.0, 77.0, 58.0, 71.0,
68.0, 50.0, 77.0, 69.0, ...
33 Height: 32 Width: 32 cat [0.525087952614, 0.0,
0.0, 0.0, 0.0, 0.0, ...
[7.0, 5.0, 8.0, 7.0, 5.0,
8.0, 5.0, 4.0, 6.0, 7.0, ...
36 Height: 32 Width: 32 cat [0.566015958786, 0.0,
0.0, 0.0, 0.0, 0.0, ...
[169.0, 122.0, 65.0,
131.0, 108.0, 75.0, ...
70 Height: 32 Width: 32 dog [1.12979578972, 0.0, 0.0,
0.778194487095, 0.0, ...
[154.0, 179.0, 152.0,
159.0, 183.0, 157.0, ...
90 Height: 32 Width: 32 bird [1.71786928177, 0.0, 0.0,
0.0, 0.0, 0.0, ...
[216.0, 195.0, 180.0,
201.0, 178.0, 160.0, ...
97 Height: 32 Width: 32 automobile [1.57818555832, 0.0, 0.0,
0.0, 0.0, 0.0, ...
[33.0, 44.0, 27.0, 29.0,
44.0, 31.0, 32.0, 45.0, ...
107 Height: 32 Width: 32 dog [0.0, 0.0,
0.220677852631, 0.0, ...
[97.0, 51.0, 31.0, 104.0,
58.0, 38.0, 107.0, 61.0, ...
121 Height: 32 Width: 32 bird [0.0, 0.23753464222, 0.0,
0.0, 0.0, 0.0, ...
[93.0, 96.0, 88.0, 102.0,
106.0, 97.0, 117.0, ...
136 Height: 32 Width: 32 automobile [0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 7.5737862587, 0.0, ...
[35.0, 59.0, 53.0, 36.0,
56.0, 56.0, 42.0, 62.0, ...
138 Height: 32 Width: 32 bird [0.658935725689, 0.0,
0.0, 0.0, 0.0, 0.0, ...
[205.0, 193.0, 195.0,
200.0, 187.0, 193.0, ...
[10 rows x 5 columns]


In [50]:
image_test = graphlab.SFrame('image_test_data/')

In [57]:
image_test[9:10]


Out[57]:
id image label deep_features image_array
33 Height: 32 Width: 32 dog [0.130786716938,
0.727667212486, 0.0, ...
[118.0, 113.0, 81.0,
122.0, 117.0, 83.0, ...
[1 rows x 5 columns]

Train a nearest neighbors model for retrieving images using deep features


In [4]:
knn_model = graphlab.nearest_neighbors.create(image_train, features=['deep_features'], label='id')


PROGRESS: Starting brute force nearest neighbors model training.

Use image retrieving model with deep features to find similar image


In [5]:
graphlab.canvas.set_target('ipynb')
cat = image_train[18:19]
cat['image'].show()



In [6]:
knn_model.query(cat)


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 24.407ms     |
PROGRESS: | Done         |         | 100         | 211.555ms    |
PROGRESS: +--------------+---------+-------------+--------------+
Out[6]:
query_label reference_label distance rank
0 384 0.0 1
0 6910 36.9403137951 2
0 39777 38.4634888975 3
0 36870 39.7559623119 4
0 41734 39.7866014148 5
[5 rows x 4 columns]


In [7]:
def get_images_from_ids(query_result):
    return image_train.filter_by(query_result['reference_label'], 'id')

In [8]:
cat_neighbors = get_images_from_ids(knn_model.query(cat))


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 18.686ms     |
PROGRESS: | Done         |         | 100         | 207.926ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [9]:
cat_neighbors['image'].show()



In [10]:
car = image_train[8:9]

In [11]:
car['image'].show()



In [12]:
get_images_from_ids(knn_model.query(car))['image'].show()


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 22.241ms     |
PROGRESS: | Done         |         | 100         | 199.811ms    |
PROGRESS: +--------------+---------+-------------+--------------+

Just for fun to create a lambda to find and show nearest neighbor images


In [13]:
show_neighbors = lambda i: get_images_from_ids(knn_model.query(image_train[i:i+1]))['image'].show()

In [14]:
show_neighbors(8)


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 23.666ms     |
PROGRESS: | Done         |         | 100         | 203.455ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [15]:
show_neighbors(26)


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 15.574ms     |
PROGRESS: | Done         |         | 100         | 230.703ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [16]:
show_neighbors(1222)


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 21.168ms     |
PROGRESS: | Done         |         | 100         | 211.313ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [17]:
show_neighbors(2000)


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 19.899ms     |
PROGRESS: | Done         |         | 100         | 205.228ms    |
PROGRESS: +--------------+---------+-------------+--------------+

Problem 1


In [18]:
image_train['label'].sketch_summary()


Out[18]:
+------------------+-------+----------+
|       item       | value | is exact |
+------------------+-------+----------+
|      Length      |  2005 |   Yes    |
| # Missing Values |   0   |   Yes    |
| # unique values  |   4   |    No    |
+------------------+-------+----------+

Most frequent items:
+-------+------------+-----+-----+------+
| value | automobile | cat | dog | bird |
+-------+------------+-----+-----+------+
| count |    509     | 509 | 509 | 478  |
+-------+------------+-----+-----+------+

In [47]:
image_train_cat = image_train[image_train['label'] == 'cat']
len(image_train_cat)
image_train_cat.head()


Out[47]:
id image label deep_features image_array
33 Height: 32 Width: 32 cat [0.525087952614, 0.0,
0.0, 0.0, 0.0, 0.0, ...
[7.0, 5.0, 8.0, 7.0, 5.0,
8.0, 5.0, 4.0, 6.0, 7.0, ...
36 Height: 32 Width: 32 cat [0.566015958786, 0.0,
0.0, 0.0, 0.0, 0.0, ...
[169.0, 122.0, 65.0,
131.0, 108.0, 75.0, ...
159 Height: 32 Width: 32 cat [0.0, 0.0, 0.0,
0.64327532053, 0.0, 0.0, ...
[154.0, 145.0, 135.0,
152.0, 144.0, 135.0, ...
331 Height: 32 Width: 32 cat [0.0, 0.0,
0.510963916779, 0.0, ...
[45.0, 65.0, 92.0, 72.0,
95.0, 110.0, 106.0, ...
367 Height: 32 Width: 32 cat [1.38658058643, 0.0, 0.0,
0.0, 0.0, 0.182891070 ...
[168.0, 151.0, 143.0,
145.0, 130.0, 124.0, ...
384 Height: 32 Width: 32 cat [1.04403531551, 0.0, 0.0,
0.0, 0.0, 0.0, ...
[46.0, 45.0, 50.0, 47.0,
45.0, 51.0, 45.0, 44.0, ...
494 Height: 32 Width: 32 cat [0.0, 0.0539512038231,
1.95745122433, 0.0, 0.0, ...
[26.0, 34.0, 29.0, 24.0,
29.0, 25.0, 33.0, 43.0, ...
597 Height: 32 Width: 32 cat [0.0, 0.0,
0.0470637083054, 0.0, ...
[133.0, 153.0, 138.0,
126.0, 146.0, 136.0, ...
788 Height: 32 Width: 32 cat [0.505841910839, 0.0,
0.0, 0.0, 0.427211523 ...
[184.0, 200.0, 197.0,
189.0, 203.0, 200.0, ...
882 Height: 32 Width: 32 cat [0.0, 0.0,
0.156200289726, 0.0, ...
[141.0, 133.0, 112.0,
143.0, 133.0, 113.0, ...
[10 rows x 5 columns]


In [20]:
image_train_dog = image_train[image_train['label'] == 'dog']
len(image_train_dog)
image_train_dog.head()


Out[20]:
id image label deep_features image_array
70 Height: 32 Width: 32 dog [1.12979578972, 0.0, 0.0,
0.778194487095, 0.0, ...
[154.0, 179.0, 152.0,
159.0, 183.0, 157.0, ...
107 Height: 32 Width: 32 dog [0.0, 0.0,
0.220677852631, 0.0, ...
[97.0, 51.0, 31.0, 104.0,
58.0, 38.0, 107.0, 61.0, ...
177 Height: 32 Width: 32 dog [0.0, 1.45965671539, 0.0,
0.422992348671, 0.0, ...
[55.0, 75.0, 42.0, 51.0,
76.0, 37.0, 57.0, 83.0, ...
424 Height: 32 Width: 32 dog [0.942399680614, 0.0,
0.220352768898, 0.0, ...
[60.0, 35.0, 18.0, 63.0,
49.0, 38.0, 66.0, 56.0, ...
462 Height: 32 Width: 32 dog [1.43462562561, 0.0, 0.0,
0.0, 0.0, 0.0, ...
[86.0, 69.0, 75.0, 57.0,
41.0, 48.0, 46.0, 35.0, ...
542 Height: 32 Width: 32 dog [0.451547086239, 0.0,
0.0, 0.0, 0.0, 0.0, ...
[196.0, 174.0, 113.0,
140.0, 117.0, 65.0, 8 ...
573 Height: 32 Width: 32 dog [0.592360973358, 0.0,
0.0, 0.0, 0.0, 0.0, ...
[153.0, 103.0, 52.0,
151.0, 102.0, 49.0, ...
851 Height: 32 Width: 32 dog [0.690123438835, 0.0,
0.0, 0.0, 0.305860161 ...
[39.0, 6.0, 4.0, 53.0,
23.0, 24.0, 57.0, 37.0, ...
919 Height: 32 Width: 32 dog [0.0, 0.177558660507,
0.139396846294, 0.0, ...
[29.0, 43.0, 4.0, 24.0,
35.0, 6.0, 24.0, 37.0, ...
1172 Height: 32 Width: 32 dog [0.517601490021, 0.0,
1.96418333054, 0.0, 0.0, ...
[182.0, 180.0, 197.0,
196.0, 192.0, 209.0, ...
[10 rows x 5 columns]


In [21]:
image_train_auto = image_train[image_train['label'] == 'automobile']
len(image_train_auto)
image_train_auto.head()


Out[21]:
id image label deep_features image_array
97 Height: 32 Width: 32 automobile [1.57818555832, 0.0, 0.0,
0.0, 0.0, 0.0, ...
[33.0, 44.0, 27.0, 29.0,
44.0, 31.0, 32.0, 45.0, ...
136 Height: 32 Width: 32 automobile [0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 7.5737862587, 0.0, ...
[35.0, 59.0, 53.0, 36.0,
56.0, 56.0, 42.0, 62.0, ...
302 Height: 32 Width: 32 automobile [0.583938002586, 0.0,
0.0, 0.0, 0.0, 0.0, ...
[64.0, 52.0, 37.0, 85.0,
60.0, 40.0, 92.0, 66.0, ...
312 Height: 32 Width: 32 automobile [0.0, 0.0, 0.0,
0.392823398113, 0.0, ...
[124.0, 126.0, 113.0,
124.0, 126.0, 113.0, ...
323 Height: 32 Width: 32 automobile [0.0, 0.0, 0.0,
4.42310428619, ...
[241.0, 241.0, 241.0,
238.0, 238.0, 238.0, ...
536 Height: 32 Width: 32 automobile [0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 8.42903900146, 0.0, ...
[164.0, 154.0, 154.0,
128.0, 119.0, 120.0, ...
593 Height: 32 Width: 32 automobile [1.65033948421, 0.0, 0.0,
0.0, 0.0, 0.0, ...
[231.0, 222.0, 227.0,
232.0, 217.0, 221.0, ...
962 Height: 32 Width: 32 automobile [0.0, 0.0, 0.0,
0.39552795887, 0.0, 0.0, ...
[255.0, 255.0, 255.0,
255.0, 255.0, 255.0, ...
997 Height: 32 Width: 32 automobile [0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 8.04085636139, 0.0, ...
[145.0, 148.0, 157.0,
131.0, 134.0, 145.0, ...
1421 Height: 32 Width: 32 automobile [0.0, 0.0, 0.0, 0.0, 0.0,
0.359612941742, ...
[114.0, 95.0, 33.0,
118.0, 98.0, 26.0, 91.0, ...
[10 rows x 5 columns]


In [22]:
image_train_bird = image_train[image_train['label'] == 'bird']
len(image_train_bird)
image_train_bird.head()


Out[22]:
id image label deep_features image_array
24 Height: 32 Width: 32 bird [0.242871761322,
1.09545373917, 0.0, ...
[73.0, 77.0, 58.0, 71.0,
68.0, 50.0, 77.0, 69.0, ...
90 Height: 32 Width: 32 bird [1.71786928177, 0.0, 0.0,
0.0, 0.0, 0.0, ...
[216.0, 195.0, 180.0,
201.0, 178.0, 160.0, ...
121 Height: 32 Width: 32 bird [0.0, 0.23753464222, 0.0,
0.0, 0.0, 0.0, ...
[93.0, 96.0, 88.0, 102.0,
106.0, 97.0, 117.0, ...
138 Height: 32 Width: 32 bird [0.658935725689, 0.0,
0.0, 0.0, 0.0, 0.0, ...
[205.0, 193.0, 195.0,
200.0, 187.0, 193.0, ...
335 Height: 32 Width: 32 bird [0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 8.50706672668, 0.0, ...
[160.0, 159.0, 154.0,
162.0, 161.0, 156.0, ...
560 Height: 32 Width: 32 bird [1.69159495831, 0.0, 0.0,
0.0, 0.0, 0.0, ...
[147.0, 138.0, 88.0,
151.0, 142.0, 92.0, ...
649 Height: 32 Width: 32 bird [0.511156201363,
0.324165046215, 0.0, ...
[65.0, 127.0, 9.0, 127.0,
160.0, 15.0, 159.0, ...
775 Height: 32 Width: 32 bird [0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 10.0127315521, 0.0, ...
[29.0, 41.0, 25.0, 29.0,
42.0, 25.0, 28.0, 41.0, ...
802 Height: 32 Width: 32 bird [0.277166724205, 0.0,
0.0, 0.0, 0.0, 0.0, ...
[233.0, 230.0, 173.0,
222.0, 218.0, 168.0, ...
975 Height: 32 Width: 32 bird [0.0, 0.0336718559265,
0.0, 0.645326733589, ...
[59.0, 180.0, 110.0,
88.0, 186.0, 117.0, ...
[10 rows x 5 columns]


In [23]:
knn_model_cat = graphlab.nearest_neighbors.create(image_train_cat, features=['deep_features'], label='id')


PROGRESS: Starting brute force nearest neighbors model training.

In [58]:
knn_model_dog = graphlab.nearest_neighbors.create(image_train_dog, features=['deep_features'], label='id')


PROGRESS: Starting brute force nearest neighbors model training.

In [25]:
knn_model_auto = graphlab.nearest_neighbors.create(image_train_auto, features=['deep_features'], label='id')


PROGRESS: Starting brute force nearest neighbors model training.

In [26]:
knn_model_bird = graphlab.nearest_neighbors.create(image_train_bird, features=['deep_features'], label='id')


PROGRESS: Starting brute force nearest neighbors model training.

In [27]:
image_test = graphlab.SFrame('image_test_data/')

In [28]:
image_test[0:1]
image_test[0:1]['image'].show()



In [29]:
knn_model_cat.query(image_test[0:1])


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 23.217ms     |
PROGRESS: | Done         |         | 100         | 98.053ms     |
PROGRESS: +--------------+---------+-------------+--------------+
Out[29]:
query_label reference_label distance rank
0 16289 34.623719208 1
0 45646 36.0068799284 2
0 32139 36.5200813436 3
0 25713 36.7548502521 4
0 331 36.8731228168 5
[5 rows x 4 columns]


In [44]:
image_train_cat.filter_by(knn_model_cat.query(image_test[0:1])['reference_label'], 'id')['image'].show()


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 19.231ms     |
PROGRESS: | Done         |         | 100         | 85.107ms     |
PROGRESS: +--------------+---------+-------------+--------------+

In [61]:
knn_model_dog.query(image_test[0:1])


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 23.046ms     |
PROGRESS: | Done         |         | 100         | 103.086ms    |
PROGRESS: +--------------+---------+-------------+--------------+
Out[61]:
query_label reference_label distance rank
0 16976 37.4642628784 1
0 13387 37.5666832169 2
0 35867 37.6047267079 3
0 44603 37.7065585153 4
0 6094 38.5113254907 5
[5 rows x 4 columns]


In [46]:
get_images_from_ids(knn_model_dog.query(image_test[0:1]))['image'].show()


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 23.123ms     |
PROGRESS: | Done         |         | 100         | 99.341ms     |
PROGRESS: +--------------+---------+-------------+--------------+

Problem 3


In [31]:
type(knn_model_cat.query(image_test[0:1])[0:5]['distance'])


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 20.556ms     |
PROGRESS: | Done         |         | 100         | 78.118ms     |
PROGRESS: +--------------+---------+-------------+--------------+
Out[31]:
graphlab.data_structures.sarray.SArray

In [60]:
knn_model_cat.query(image_test[0:1])[0:5]['distance'].mean()


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 21.807ms     |
PROGRESS: | Done         |         | 100         | 85.812ms     |
PROGRESS: +--------------+---------+-------------+--------------+
Out[60]:
36.15573070978294

In [59]:
knn_model_dog.query(image_test[0:1])[0:5]['distance'].mean()


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 14.744ms     |
PROGRESS: | Done         |         | 100         | 86.349ms     |
PROGRESS: +--------------+---------+-------------+--------------+
Out[59]:
37.77071136184156

Problem 4


In [34]:
image_test_cat = image_test[image_test['label'] == 'cat']
image_test_cat.head()


Out[34]:
id image label deep_features image_array
0 Height: 32 Width: 32 cat [1.13469004631, 0.0, 0.0,
0.0, 0.0366497635841, ...
[158.0, 112.0, 49.0,
159.0, 111.0, 47.0, ...
8 Height: 32 Width: 32 cat [0.0, 0.0,
0.0344192385674, 0.0, ...
[23.0, 19.0, 23.0, 19.0,
21.0, 28.0, 21.0, 16.0, ...
46 Height: 32 Width: 32 cat [0.0, 0.0,
0.255758941174, 0.0, ...
[22.0, 27.0, 19.0, 30.0,
36.0, 26.0, 30.0, 36.0, ...
53 Height: 32 Width: 32 cat [0.61073166132, 0.0, 0.0,
0.0, 0.0, 0.0, ...
[255.0, 255.0, 251.0,
249.0, 251.0, 246.0, ...
61 Height: 32 Width: 32 cat [0.935939073563, 0.0,
0.778302431107, 0.0, ...
[10.0, 10.0, 10.0, 10.0,
10.0, 10.0, 10.0, 10.0, ...
63 Height: 32 Width: 32 cat [0.0, 0.0,
0.359475255013, 0.0, ...
[233.0, 225.0, 209.0,
201.0, 170.0, 123.0, ...
68 Height: 32 Width: 32 cat [0.708701610565, 0.0,
0.0, 0.0, 0.198133170 ...
[61.0, 44.0, 21.0, 55.0,
41.0, 17.0, 58.0, 48.0, ...
77 Height: 32 Width: 32 cat [0.295638740063, 0.0,
0.0, 0.0, 0.0, 0.0, ...
[51.0, 49.0, 53.0, 57.0,
56.0, 61.0, 61.0, 61.0, ...
78 Height: 32 Width: 32 cat [0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 8.64840698242, 0.0, ...
[38.0, 24.0, 12.0, 50.0,
31.0, 24.0, 37.0, 21.0, ...
91 Height: 32 Width: 32 cat [1.24886679649, 0.0, 0.0,
0.0196405649185, ...
[72.0, 75.0, 57.0, 70.0,
74.0, 52.0, 63.0, 67.0, ...
[10 rows x 5 columns]


In [35]:
image_test_dog = image_test[image_test['label'] == 'dog']
image_test_dog.head()


Out[35]:
id image label deep_features image_array
12 Height: 32 Width: 32 dog [0.322317481041, 0.0,
1.24933350086, 0.0, 0.0, ...
[91.0, 64.0, 30.0, 82.0,
58.0, 30.0, 87.0, 73.0, ...
16 Height: 32 Width: 32 dog [0.0, 0.0,
0.347357034683, 0.0, ...
[95.0, 76.0, 78.0, 92.0,
77.0, 78.0, 89.0, 77.0, ...
24 Height: 32 Width: 32 dog [1.31557655334, 0.0, 0.0,
0.0, 0.0, 0.0, ...
[136.0, 134.0, 118.0,
142.0, 141.0, 126.0, ...
31 Height: 32 Width: 32 dog [0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 9.26018810272, 0.0, ...
[127.0, 130.0, 81.0,
130.0, 133.0, 88.0, ...
33 Height: 32 Width: 32 dog [0.130786716938,
0.727667212486, 0.0, ...
[118.0, 113.0, 81.0,
122.0, 117.0, 83.0, ...
39 Height: 32 Width: 32 dog [0.0112721920013, 0.0,
0.76088231802, 0.0, 0.0, ...
[72.0, 88.0, 97.0, 55.0,
70.0, 84.0, 61.0, 73.0, ...
42 Height: 32 Width: 32 dog [0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 8.33566474915, 0.0, ...
[149.0, 149.0, 150.0,
149.0, 149.0, 146.0, ...
85 Height: 32 Width: 32 dog [0.0, 0.298464655876,
0.0, 0.0, 0.0, 0.0, ...
[113.0, 174.0, 228.0,
113.0, 174.0, 227.0, ...
101 Height: 32 Width: 32 dog [0.591187477112, 0.0,
0.0, 0.0, 0.420105159 ...
[60.0, 77.0, 93.0, 60.0,
76.0, 92.0, 58.0, 74.0, ...
128 Height: 32 Width: 32 dog [0.0, 0.0,
0.0717121362686, ...
[86.0, 18.0, 39.0, 84.0,
18.0, 38.0, 82.0, 16.0, ...
[10 rows x 5 columns]


In [36]:
image_test_auto = image_test[image_test['label'] == 'automobile']
image_test_auto.head()


Out[36]:
id image label deep_features image_array
6 Height: 32 Width: 32 automobile [0.23135882616, 0.0, 0.0,
0.0, 0.0, 0.226023137 ...
[160.0, 37.0, 13.0,
185.0, 49.0, 11.0, 20 ...
9 Height: 32 Width: 32 automobile [0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 11.6065092087, 0.0, ...
[217.0, 215.0, 209.0,
210.0, 208.0, 202.0, ...
37 Height: 32 Width: 32 automobile [0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 8.87486553192, 0.0, ...
[255.0, 255.0, 255.0,
255.0, 255.0, 255.0, ...
66 Height: 32 Width: 32 automobile [0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 8.66312026978, 0.0, ...
[109.0, 65.0, 29.0, 84.0,
65.0, 46.0, 100.0, 10 ...
81 Height: 32 Width: 32 automobile [0.0, 0.0, 0.0,
0.600168287754, 0.0, ...
[2.0, 2.0, 13.0, 0.0,
0.0, 12.0, 0.0, 0.0, ...
82 Height: 32 Width: 32 automobile [0.0, 0.0, 0.0,
0.976673960686, ...
[45.0, 70.0, 27.0, 49.0,
76.0, 31.0, 62.0, 81.0, ...
104 Height: 32 Width: 32 automobile [0.682431340218,
0.567831158638, 0.0, ...
[245.0, 250.0, 244.0,
245.0, 249.0, 244.0, ...
105 Height: 32 Width: 32 automobile [0.0, 0.0, 0.0,
3.45144057274, ...
[255.0, 255.0, 255.0,
252.0, 252.0, 252.0, ...
114 Height: 32 Width: 32 automobile [0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 7.49539375305, 0.0, ...
[199.0, 209.0, 210.0,
195.0, 205.0, 206.0, ...
122 Height: 32 Width: 32 automobile [0.0, 0.564986824989,
0.0, 1.67841720581, 0.0, ...
[51.0, 71.0, 34.0, 47.0,
66.0, 33.0, 40.0, 66.0, ...
[10 rows x 5 columns]


In [37]:
image_test_bird = image_test[image_test['label'] == 'bird']
image_test_bird.head()


Out[37]:
id image label deep_features image_array
25 Height: 32 Width: 32 bird [0.0, 0.317288756371,
0.0, 1.36552882195, ...
[100.0, 103.0, 74.0,
68.0, 91.0, 65.0, 116.0, ...
35 Height: 32 Width: 32 bird [0.778077363968, 0.0,
0.0, 0.0, 0.0, 0.0, ...
[66.0, 73.0, 84.0, 66.0,
71.0, 81.0, 64.0, 67.0, ...
65 Height: 32 Width: 32 bird [0.888774394989, 0.0,
0.0, 1.24411165714, ...
[201.0, 206.0, 166.0,
187.0, 180.0, 132.0, ...
67 Height: 32 Width: 32 bird [0.315794527531, 0.0,
0.0, 0.586381316185, ...
[76.0, 170.0, 228.0,
77.0, 171.0, 225.0, 8 ...
70 Height: 32 Width: 32 bird [1.34134876728, 0.0, 0.0,
0.0, 0.0, 0.0, ...
[193.0, 181.0, 145.0,
181.0, 172.0, 147.0, ...
75 Height: 32 Width: 32 bird [1.92161560059, 0.0, 0.0,
0.0, 0.905619382858, ...
[63.0, 111.0, 53.0, 63.0,
110.0, 53.0, 65.0, 11 ...
84 Height: 32 Width: 32 bird [0.472827553749, 0.0,
0.0, 0.0, 0.0, 0.0, ...
[233.0, 231.0, 230.0,
226.0, 225.0, 223.0, ...
86 Height: 32 Width: 32 bird [0.0, 0.0, 0.0, 0.0, 0.0,
0.0170202255249, ...
[116.0, 167.0, 136.0,
110.0, 167.0, 139.0, ...
113 Height: 32 Width: 32 bird [1.47401452065, 0.0, 0.0,
0.219570279121, ...
[114.0, 117.0, 122.0,
117.0, 120.0, 125.0, ...
118 Height: 32 Width: 32 bird [0.0, 0.0, 0.0, 0.0, 0.0,
0.465785324574, ...
[4.0, 4.0, 2.0, 2.0, 2.0,
2.0, 3.0, 4.0, 4.0, 4.0, ...
[10 rows x 5 columns]


In [64]:
dog_cat_neighbors = knn_model_cat.query(image_test_dog, k=1)
dog_dog_neighbors = knn_model_dog.query(image_test_dog, k=1)
dog_bird_neighbors = knn_model_bird.query(image_test_dog, k=1)
dog_auto_neighbors = knn_model_auto.query(image_test_dog, k=1)


PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 4
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 127000  | 24.9509     | 379.238ms    |
PROGRESS: | Done         | 509000  | 100         | 456.605ms    |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 4
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 128000  | 25.1473     | 388.754ms    |
PROGRESS: | Done         | 509000  | 100         | 429.744ms    |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 4
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 119000  | 24.8954     | 359.367ms    |
PROGRESS: | Done         | 478000  | 100         | 389.583ms    |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 4
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 127000  | 24.9509     | 356.908ms    |
PROGRESS: | Done         | 509000  | 100         | 424.848ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [66]:
dog_distances = graphlab.SFrame({'dog-dog': dog_dog_neighbors['distance'],
                                 'dog-cat': dog_cat_neighbors['distance'],
                                 'dog-bird': dog_bird_neighbors['distance'],
                                 'dog-auto': dog_auto_neighbors['distance']})
dog_distances.head()


Out[66]:
dog-auto dog-bird dog-cat dog-dog
41.9579761457 41.7538647304 36.4196077068 33.4773590373
46.0021331807 41.3382958925 38.8353268874 32.8458495684
42.9462290692 38.6157590853 36.9763410854 35.0397073189
41.6866060048 37.0892269954 34.5750072914 33.9010327697
39.2269664935 38.272288694 34.778824791 37.4849250909
40.5845117698 39.1462089236 35.1171578292 34.945165344
45.1067352961 40.523040106 40.6095830913 39.0957278345
41.3221140974 38.1947918393 39.9036867306 37.7696131032
41.8244654995 40.1567131661 38.0674700168 35.1089144603
45.4976929401 45.5597962603 42.7258732951 43.2422832585
[10 rows x 4 columns]


In [69]:
def is_dog_correct(row):
    if row['dog-auto'] < row['dog-dog']:
        return 0
    if row['dog-bird'] < row['dog-dog']:
        return 0
    if row['dog-cat'] < row['dog-dog']:
        return 0
    return 1

In [70]:
dog_distances.apply(is_dog_correct).sum()


Out[70]:
678

In [71]:
len(dog_distances)


Out[71]:
1000