In [1]:
import graphlab

Load CIFAR-10 dataset


In [2]:
image_train = graphlab.SFrame('image_train_data_2/')


[INFO] This non-commercial license of GraphLab Create is assigned to akshay.narayan@u.nus.edu and will expire on September 26, 2016. For commercial licensing options, visit https://dato.com/buy/.

[INFO] Start server at: ipc:///tmp/graphlab_server-7626 - Server binary: /usr/local/lib/python2.7/dist-packages/graphlab/unity_server - Server log: /tmp/graphlab_server_1449378909.log
[INFO] GraphLab Server Version: 1.6.1

In [3]:
image_train.head()


Out[3]:
id image label deep_features image_array
24 Height: 32 Width: 32 bird [0.242871761322,
1.09545373917, 0.0, ...
[73.0, 77.0, 58.0, 71.0,
68.0, 50.0, 77.0, 69.0, ...
33 Height: 32 Width: 32 cat [0.525087952614, 0.0,
0.0, 0.0, 0.0, 0.0, ...
[7.0, 5.0, 8.0, 7.0, 5.0,
8.0, 5.0, 4.0, 6.0, 7.0, ...
36 Height: 32 Width: 32 cat [0.566015958786, 0.0,
0.0, 0.0, 0.0, 0.0, ...
[169.0, 122.0, 65.0,
131.0, 108.0, 75.0, ...
70 Height: 32 Width: 32 dog [1.12979578972, 0.0, 0.0,
0.778194487095, 0.0, ...
[154.0, 179.0, 152.0,
159.0, 183.0, 157.0, ...
90 Height: 32 Width: 32 bird [1.71786928177, 0.0, 0.0,
0.0, 0.0, 0.0, ...
[216.0, 195.0, 180.0,
201.0, 178.0, 160.0, ...
97 Height: 32 Width: 32 automobile [1.57818555832, 0.0, 0.0,
0.0, 0.0, 0.0, ...
[33.0, 44.0, 27.0, 29.0,
44.0, 31.0, 32.0, 45.0, ...
107 Height: 32 Width: 32 dog [0.0, 0.0,
0.220677852631, 0.0, ...
[97.0, 51.0, 31.0, 104.0,
58.0, 38.0, 107.0, 61.0, ...
121 Height: 32 Width: 32 bird [0.0, 0.23753464222, 0.0,
0.0, 0.0, 0.0, ...
[93.0, 96.0, 88.0, 102.0,
106.0, 97.0, 117.0, ...
136 Height: 32 Width: 32 automobile [0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 7.5737862587, 0.0, ...
[35.0, 59.0, 53.0, 36.0,
56.0, 56.0, 42.0, 62.0, ...
138 Height: 32 Width: 32 bird [0.658935725689, 0.0,
0.0, 0.0, 0.0, 0.0, ...
[205.0, 193.0, 195.0,
200.0, 187.0, 193.0, ...
[10 rows x 5 columns]

Creating nearest neighbors model for retrieving images using deep features


In [4]:
knn_model = graphlab.nearest_neighbors.create(image_train,
                                             features=['deep_features'],
                                             label='id')


PROGRESS: Starting brute force nearest neighbors model training.

Using knn model to find similar images


In [5]:
cat = image_train[18:19]

In [6]:
graphlab.canvas.set_target('ipynb')

In [7]:
cat['image'].show()



In [8]:
knn_model.query(cat)


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 10.652ms     |
PROGRESS: | Done         |         | 100         | 186.896ms    |
PROGRESS: +--------------+---------+-------------+--------------+
Out[8]:
query_label reference_label distance rank
0 384 0.0 1
0 6910 36.9403137951 2
0 39777 38.4634888975 3
0 36870 39.7559623119 4
0 41734 39.7866014148 5
[5 rows x 4 columns]


In [10]:
def get_images_from_ids(query_result):
    return image_train.filter_by(query_result['reference_label'], 
                                'id')

In [11]:
cat_neighbors = get_images_from_ids(knn_model.query(cat))


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 8.325ms      |
PROGRESS: | Done         |         | 100         | 187.995ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [12]:
cat_neighbors['image'].show()



In [13]:
car = image_train[8:9]

In [14]:
car['image'].show()



In [15]:
get_images_from_ids(knn_model.query(car))['image'].show()


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 14.863ms     |
PROGRESS: | Done         |         | 100         | 185.29ms     |
PROGRESS: +--------------+---------+-------------+--------------+

Create a lambda to find and show the NN images


In [16]:
show_neighbors = lambda i: get_images_from_ids(knn_model.query(image_train[i:i+1]))['image'].show()

In [17]:
show_neighbors(8)


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 9.391ms      |
PROGRESS: | Done         |         | 100         | 182.124ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [18]:
show_neighbors(26)


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 11.755ms     |
PROGRESS: | Done         |         | 100         | 196.706ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [19]:
show_neighbors(122)


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 9.103ms      |
PROGRESS: | Done         |         | 100         | 190.016ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [20]:
show_neighbors(1222)


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 8.623ms      |
PROGRESS: | Done         |         | 100         | 181.369ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [21]:
show_neighbors(2000)


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.0498753   | 10.369ms     |
PROGRESS: | Done         |         | 100         | 183.209ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [23]:
image_train['label'].sketch_summary()


Out[23]:
+------------------+-------+----------+
|       item       | value | is exact |
+------------------+-------+----------+
|      Length      |  2005 |   Yes    |
| # Missing Values |   0   |   Yes    |
| # unique values  |   4   |    No    |
+------------------+-------+----------+

Most frequent items:
+-------+------------+-----+-----+------+
| value | automobile | cat | dog | bird |
+-------+------------+-----+-----+------+
| count |    509     | 509 | 509 | 478  |
+-------+------------+-----+-----+------+

In [30]:
dog_sframe = image_train[image_train['label'] == 'dog']

In [31]:
dog_sframe.show()



In [32]:
len(dog_sframe)


Out[32]:
509

In [33]:
cat_sframe = image_train[image_train['label'] == 'cat']

In [34]:
bird_sframe = image_train[image_train['label'] == 'bird']

In [35]:
automobile_sframe = image_train[image_train['label'] == 'automobile']

In [38]:
len(cat_sframe), len(bird_sframe), len(automobile_sframe), len(dog_sframe)


Out[38]:
(509, 478, 509, 509)

In [39]:
cat_sframe.show()



In [40]:
dog_sframe.show()



In [41]:
bird_sframe.show()



In [42]:
automobile_sframe.show()



In [43]:
dog_model = graphlab.nearest_neighbors.create(dog_sframe,
                                             features=['deep_features'],
                                             label='id')


PROGRESS: Starting brute force nearest neighbors model training.

In [44]:
cat_model = graphlab.nearest_neighbors.create(cat_sframe,
                                             features=['deep_features'],
                                             label='id')


PROGRESS: Starting brute force nearest neighbors model training.

In [45]:
bird_model = graphlab.nearest_neighbors.create(bird_sframe,
                                             features=['deep_features'],
                                             label='id')


PROGRESS: Starting brute force nearest neighbors model training.

In [46]:
automobile_model = graphlab.nearest_neighbors.create(automobile_sframe,
                                             features=['deep_features'],
                                             label='id')


PROGRESS: Starting brute force nearest neighbors model training.

In [47]:
image_test = graphlab.SFrame('image_test_data_2/')

In [48]:
image_test[0:1]['image'].show()



In [49]:
catQ = image_test[0:1]

In [50]:
catQ


Out[50]:
id image label deep_features image_array
0 Height: 32 Width: 32 cat [1.13469004631, 0.0, 0.0,
0.0, 0.0366497635841, ...
[158.0, 112.0, 49.0,
159.0, 111.0, 47.0, ...
[1 rows x 5 columns]


In [51]:
cat_model.query(catQ)


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 10.142ms     |
PROGRESS: | Done         |         | 100         | 61.414ms     |
PROGRESS: +--------------+---------+-------------+--------------+
Out[51]:
query_label reference_label distance rank
0 16289 34.623719208 1
0 45646 36.0068799284 2
0 32139 36.5200813436 3
0 25713 36.7548502521 4
0 331 36.8731228168 5
[5 rows x 4 columns]


In [54]:
get_images_from_ids(cat_model.query(catQ))['image'].show()


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 12.116ms     |
PROGRESS: | Done         |         | 100         | 57.133ms     |
PROGRESS: +--------------+---------+-------------+--------------+

In [58]:
image_test['id'==16289]['image'].show()

In [59]:
graphlab.canvas.set_target('ipynb')

In [62]:
get_images_from_ids(dog_model.query(catQ))['image'].show()


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 16.722ms     |
PROGRESS: | Done         |         | 100         | 64.537ms     |
PROGRESS: +--------------+---------+-------------+--------------+

In [63]:
dog_model.query(catQ)


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 6.604ms      |
PROGRESS: | Done         |         | 100         | 57.57ms      |
PROGRESS: +--------------+---------+-------------+--------------+
Out[63]:
query_label reference_label distance rank
0 16976 37.4642628784 1
0 13387 37.5666832169 2
0 35867 37.6047267079 3
0 44603 37.7065585153 4
0 6094 38.5113254907 5
[5 rows x 4 columns]


In [67]:
dog_model.query(catQ)['distance'].mean()


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 13.196ms     |
PROGRESS: | Done         |         | 100         | 59.689ms     |
PROGRESS: +--------------+---------+-------------+--------------+
Out[67]:
37.77071136184156

In [68]:
cat_model.query(catQ)['distance'].mean()


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 8.477ms      |
PROGRESS: | Done         |         | 100         | 59.021ms     |
PROGRESS: +--------------+---------+-------------+--------------+
Out[68]:
36.15573070978294

In [69]:
dog_test = image_test[image_test['label'] == 'dog']

In [70]:
cat_test = image_test[image_test['label'] == 'cat']

In [71]:
bird_test = image_test[image_test['label'] == 'bird']

In [72]:
automobile_test = image_test[image_test['label'] == 'automobile']

In [73]:
dog_cat_neighbors = cat_model.query(dog_test, k=1)


PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 4
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 127000  | 24.9509     | 304.923ms    |
PROGRESS: | Done         | 509000  | 100         | 389.945ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [74]:
dog_cat_neighbors.head()


Out[74]:
query_label reference_label distance rank
0 33 36.4196077068 1
1 30606 38.8353268874 1
2 5545 36.9763410854 1
3 19631 34.5750072914 1
4 7493 34.778824791 1
5 47044 35.1171578292 1
6 13918 40.6095830913 1
7 10981 39.9036867306 1
8 45456 38.0674700168 1
9 44673 42.7258732951 1
[10 rows x 4 columns]


In [75]:
dog_bird_neighbors = bird_model.query(dog_test, k=1)


PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 4
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 119000  | 24.8954     | 301.652ms    |
PROGRESS: | Done         | 478000  | 100         | 308.021ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [76]:
dog_dog_neighbors = dog_model.query(dog_test, k=1)


PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 4
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 127000  | 24.9509     | 329.728ms    |
PROGRESS: | Done         | 509000  | 100         | 351.276ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [77]:
dog_automobile_neighbors = automobile_model.query(dog_test, k=1)


PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 4
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 128000  | 25.1473     | 305.669ms    |
PROGRESS: | Done         | 509000  | 100         | 339.556ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [78]:
new_sframe = graphlab.SFrame({'dog-automobile': dog_automobile_neighbors['distance'],
                             'dog-bird': dog_bird_neighbors['distance'],
                             'dog-cat': dog_cat_neighbors['distance'],
                             'dog-dog': dog_dog_neighbors['distance']})

In [79]:
new_sframe.head()


Out[79]:
dog-automobile dog-bird dog-cat dog-dog
41.9579761457 41.7538647304 36.4196077068 33.4773590373
46.0021331807 41.3382958925 38.8353268874 32.8458495684
42.9462290692 38.6157590853 36.9763410854 35.0397073189
41.6866060048 37.0892269954 34.5750072914 33.9010327697
39.2269664935 38.272288694 34.778824791 37.4849250909
40.5845117698 39.1462089236 35.1171578292 34.945165344
45.1067352961 40.523040106 40.6095830913 39.0957278345
41.3221140974 38.1947918393 39.9036867306 37.7696131032
41.8244654995 40.1567131661 38.0674700168 35.1089144603
45.4976929401 45.5597962603 42.7258732951 43.2422832585
[10 rows x 4 columns]


In [80]:
dog_distance= new_sframe

In [83]:
dog_distance[0:1]


Out[83]:
dog-automobile dog-bird dog-cat dog-dog
41.9579761457 41.7538647304 36.4196077068 33.4773590373
[1 rows x 4 columns]


In [85]:
def is_dog_correct(row):
    if row['dog-dog'] < row['dog-automobile'] and row['dog-dog'] < row['dog-bird'] and row['dog-dog'] < row['dog-cat']:
        return 1
    else: 
        return 0

In [86]:
dog_distance.apply(is_dog_correct).sum()


Out[86]:
678

In [87]:
len(dog_test)


Out[87]:
1000

In [ ]: