In [1]:
import graphlab

In [2]:
image_train = graphlab.SFrame('image_train_data/')


[INFO] This non-commercial license of GraphLab Create is assigned to zhanglh13@fudan.edu.cnand will expire on September 21, 2016. For commercial licensing options, visit https://dato.com/buy/.

[INFO] Start server at: ipc:///tmp/graphlab_server-4716 - Server binary: c:\home\courses\machine learning - uw\dato\lib\site-packages\graphlab\unity_server.exe - Server log: C:\Users\linghao\AppData\Local\Temp\graphlab_server_1443778293.log.0
[INFO] GraphLab Server Version: 1.6

In [24]:
image_test = graphlab.SFrame('image_test_data/')

In [6]:
image_train['label'].sketch_summary()


Out[6]:
+------------------+-------+----------+
|       item       | value | is exact |
+------------------+-------+----------+
|      Length      |  2005 |   Yes    |
| # Missing Values |   0   |   Yes    |
| # unique values  |   4   |    No    |
+------------------+-------+----------+

Most frequent items:
+-------+------------+-----+-----+------+
| value | automobile | cat | dog | bird |
+-------+------------+-----+-----+------+
| count |    509     | 509 | 509 | 478  |
+-------+------------+-----+-----+------+

In [21]:
targets = ['bird', 'dog', 'cat', 'automobile']

In [22]:
targets_train_data = {}
for x in targets:
    targets_train_data[x] = image_train[image_train['label'] == x]
    print len(targets_train_data[x])


478
509
509
509

In [23]:
targets_model = {}
for x in targets:
    targets_model[x] = graphlab.nearest_neighbors.create(targets_train_data[x], features=['deep_features'], label='id')


PROGRESS: Starting brute force nearest neighbors model training.
PROGRESS: Starting brute force nearest neighbors model training.
PROGRESS: Starting brute force nearest neighbors model training.
PROGRESS: Starting brute force nearest neighbors model training.

In [67]:
graphlab.canvas.set_target('browser')

In [57]:
def show_image(id):
    image_train.filter_by(id, 'id')['image'].show()

In [30]:
targets_model['cat'].query(image_test[0:1])


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 11.001ms     |
PROGRESS: | Done         |         | 100         | 83.005ms     |
PROGRESS: +--------------+---------+-------------+--------------+
Out[30]:
query_label reference_label distance rank
0 16289 34.623719208 1
0 45646 36.0068799284 2
0 32139 36.5200813436 3
0 25713 36.7548502521 4
0 331 36.8731228168 5
[5 rows x 4 columns]

In [55]:
sum([row['distance'] for row in targets_model['cat'].query(image_test[0:1], k=5)]) / 5.0


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 17.001ms     |
PROGRESS: | Done         |         | 100         | 78.005ms     |
PROGRESS: +--------------+---------+-------------+--------------+
Out[55]:
36.15573070978294

In [68]:
show_image(16289)


Canvas is accessible via web browser at the URL: http://localhost:52078/index.html
Opening Canvas in default web browser.

In [31]:
targets_model['dog'].query(image_test[0:1])


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 12.001ms     |
PROGRESS: | Done         |         | 100         | 56.003ms     |
PROGRESS: +--------------+---------+-------------+--------------+
Out[31]:
query_label reference_label distance rank
0 16976 37.4642628784 1
0 13387 37.5666832169 2
0 35867 37.6047267079 3
0 44603 37.7065585153 4
0 6094 38.5113254907 5
[5 rows x 4 columns]

In [56]:
sum([row['distance'] for row in targets_model['dog'].query(image_test[0:1], k=5)]) / 5.0


PROGRESS: Starting pairwise querying.
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 0            | 1       | 0.196464    | 11.001ms     |
PROGRESS: | Done         |         | 100         | 64.004ms     |
PROGRESS: +--------------+---------+-------------+--------------+
Out[56]:
37.77071136184157

In [69]:
show_image(16976)


Canvas is updated and available in a tab in the default browser.

In [33]:
targets_test_data = {}
for x in targets:
    targets_test_data[x] = image_test[image_test['label'] == x]
    print len(targets_test_data[x])


1000
1000
1000
1000

In [35]:
dog_neighbors = {}
for x in targets:
    dog_neighbors[x] = targets_model[x].query(targets_test_data['dog'], k=1)


PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 4
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 119000  | 24.8954     | 362.02ms     |
PROGRESS: | Done         | 478000  | 100         | 388.022ms    |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 4
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 127000  | 24.9509     | 374.021ms    |
PROGRESS: | Done         | 509000  | 100         | 421.024ms    |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 4
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 128000  | 25.1473     | 381.022ms    |
PROGRESS: | Done         | 509000  | 100         | 419.024ms    |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 4
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 127000  | 24.9509     | 376.022ms    |
PROGRESS: | Done         | 509000  | 100         | 431.025ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [39]:
dog_dist_gen_dict = {}
for x in targets:
    dog_dist_gen_dict['dog-' + x] = dog_neighbors[x]['distance']

In [40]:
dogSF = graphlab.SFrame(dog_dist_gen_dict)

In [47]:
def is_dog_correct(row):
    return min(row.values()) == row['dog-dog']

In [49]:
dogSF.apply(is_dog_correct).sum()


Out[49]:
678L

In [ ]:


In [50]:
cat_neighbors = {}
for x in targets:
    cat_neighbors[x] = targets_model[x].query(targets_test_data['cat'], k=1)


PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 4
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 119000  | 24.8954     | 400.023ms    |
PROGRESS: | Done         | 478000  | 100         | 494.028ms    |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 4
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 127000  | 24.9509     | 423.024ms    |
PROGRESS: | Done         | 509000  | 100         | 486.028ms    |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 4
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 127000  | 24.9509     | 459.027ms    |
PROGRESS: | Done         | 509000  | 100         | 520.03ms     |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: Starting blockwise querying.
PROGRESS: max rows per data block: 7668
PROGRESS: number of reference data blocks: 4
PROGRESS: number of query data blocks: 1
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | Query points | # Pairs | % Complete. | Elapsed Time |
PROGRESS: +--------------+---------+-------------+--------------+
PROGRESS: | 1000         | 127000  | 24.9509     | 382.022ms    |
PROGRESS: | Done         | 509000  | 100         | 430.024ms    |
PROGRESS: +--------------+---------+-------------+--------------+

In [51]:
cat_dist_gen_dict = {}
for x in targets:
    cat_dist_gen_dict['cat-' + x] = cat_neighbors[x]['distance']

In [52]:
catSF = graphlab.SFrame(cat_dist_gen_dict)

In [53]:
def is_cat_correct(row):
    return min(row.values()) == row['cat-cat']

In [54]:
catSF.apply(is_cat_correct).sum()


Out[54]:
548L