In [1]:
import graphlab
In [2]:
# Limit number of worker processes. This preserves system memory, which prevents hosted notebooks from crashing.
graphlab.set_runtime_config('GRAPHLAB_DEFAULT_NUM_PYLAMBDA_WORKERS', 4)
In [3]:
image_train = graphlab.SFrame('image_train_data/')
In [4]:
image_test = graphlab.SFrame('image_test_data/')
In [5]:
image_train.head()
Out[5]:
In [6]:
image_train['label'].sketch_summary()
Out[6]:
In [7]:
label_filter = lambda l : image_train[image_train['label'] == l]
In [8]:
image_train_auto = label_filter('automobile')
len(image_train_auto)
Out[8]:
In [9]:
image_train_cat = label_filter('cat')
len(image_train_cat)
Out[9]:
In [10]:
image_train_dog = label_filter('dog')
len(image_train_dog)
Out[10]:
In [11]:
image_train_bird = label_filter('bird')
len(image_train_bird)
Out[11]:
In [12]:
auto_model = graphlab.nearest_neighbors.create(image_train_auto, features=['deep_features'], label='id')
cat_model = graphlab.nearest_neighbors.create(image_train_cat, features=['deep_features'], label='id')
dog_model = graphlab.nearest_neighbors.create(image_train_dog, features=['deep_features'], label='id')
bird_model = graphlab.nearest_neighbors.create(image_train_bird, features=['deep_features'], label='id')
In [13]:
def get_images_from_ids(query_result):
return image_train.filter_by(query_result['reference_label'], 'id')
In [14]:
show_neighbours = lambda i : get_images_from_ids(knn_model.query(image_train[i:i+1]))['image'].show()
In [15]:
image_test[0:1]['image'].show()
In [16]:
graphlab.canvas.set_target('ipynb')
In [17]:
image_test[0:1]['image'].show()
What is the nearest ‘cat’ labeled image in the training data to the cat image above (the first image in the test data)? Save this result.
In [71]:
cat_model.query(image_test[0:1])
Out[71]:
In [72]:
image_train_cat[image_train_cat['id'] == 16289]['image'].show()
What is the nearest ‘dog’ labeled image in the training data to the cat image above (the first image in the test data)? Save this result.
In [19]:
dog_model.query(image_test[0:1])
Out[19]:
In [73]:
image_train_dog[image_train_dog['id'] == 16976]['image'].show()
For the first image in the test data (image_test[0:1]), which we used above, compute the mean distance between this image at its 5 nearest neighbors that were labeled ‘cat’ in the training data (similarly to what you did in the previous question). Save this result.
In [20]:
cat_model.query(image_test[0:1])['distance'].mean()
Out[20]:
Similarly, for the first image in the test data (image_test[0:1]), which we used above, compute the mean distance between this image at its 5 nearest neighbors that were labeled ‘dog’ in the training data (similarly to what you did in the previous question). Save this result.
In [21]:
dog_model.query(image_test[0:1])['distance'].mean()
Out[21]:
In [22]:
label_filter_test = lambda l : image_test[image_test['label'] == l]
image_test_cat = label_filter_test('cat')
print len(image_test_cat)
image_test_dog = label_filter_test('dog')
print len(image_test_dog)
image_test_bird = label_filter_test('bird')
print len(image_test_bird)
image_test_automobile = label_filter_test('automobile')
print len(image_test_automobile)
In [23]:
print len(image_test)
In [24]:
dog_dog_neighbors = dog_model.query(image_test_dog, k = 1)
dog_cat_neighbors = cat_model.query(image_test_dog, k = 1)
dog_automobile_neighbors = auto_model.query(image_test_dog, k = 1)
dog_bird_neighbors = bird_model.query(image_test_dog, k = 1)
In [26]:
dog_distances = graphlab.SFrame({
'dog-dog' : dog_dog_neighbors['distance'],
'dog-cat' : dog_cat_neighbors['distance'],
'dog-bird': dog_bird_neighbors['distance'],
'dog-automobile': dog_automobile_neighbors['distance']
})
In [27]:
dog_distances.head()
Out[27]:
In [54]:
dog_distances[0:1]['dog-dog']
Out[54]:
In [65]:
def is_dog_correct(r):
return r['dog-dog'] < r['dog-cat'] and r['dog-dog'] < r['dog-bird'] and r['dog-dog'] < r['dog-automobile']
In [66]:
is_dog_correct(dog_distances[0:1])
In [68]:
dog_distances.apply(is_dog_correct).sum() / float(len(image_test_dog))
Out[68]:
In [62]:
dog_distances
Out[62]:
In [ ]: