In [ ]:
# Try predicting the augmented versions as well
# aggregate the results from all augmentations of 1 image

In [95]:
import numpy as np
import time
import sys
import tools.my_io as my_io
import caffe
import itertools
import pickle

# Set the right path to your model definition file, pretrained model weights,
# and the image you would like to classify.
MODEL_FILE = './deploy_vanilla.prototxt'
# PRETRAINED = './models/vanilla/vanilla_iter_20000.caffemodel'
PRETRAINED = '/media/raid_arr/data/ndsb/models/pl_iter_38000.caffemodel'
MEAN_FILE = '/media/raid_arr/data/ndsb/augment/testaug_mean.npy'
# TEST_FILE = './data/test_final.txt'
# TEST_DB = '/data/ndsb/ndsb_test_lmdb'
# TEST_DB = '/data/ndsb/augment/ndsb_testaug_lmdb/'
TEST_DB = './data/64x64/ndsb_test_lmdb'
N = 1000   # Chunk size

In [71]:
# # Loading From Database
# reload(my_io)
# print 'Loading data...'
# tic = time.time()
# data, next_key = my_io.load_lmdb_chunk(TEST_DB, '', 1000000)
# print "Done in %.2f s." % (time.time() - tic)

# # test_files_all, images, labels = zip(*data)
# # test_labels = labels


Loading data...
Done in 0.95 s.

In [77]:
# image_dims = data[0][1].shape[:2]
image_dims = (64, 64)
net = caffe.Classifier(MODEL_FILE, PRETRAINED,
                       mean=np.load(MEAN_FILE),
                       raw_scale=1.0,    # 255 if load from caffe.io, 1.0 if load from my_io lmdb
                       image_dims=image_dims,
                       gpu=True)
caffe.set_phase_test()
caffe.set_mode_gpu()

In [4]:
# # Chunking for the sake of memory
# def chunks(l, n):
#     """ Yield successive n-sized chunks from l.
#     """
#     #for i in xrange(0, len(l), n):
#     #    yield l[i:i+n]
#     return [l[i:i+n] for i in xrange(0, len(l), n)]

# data_chunks = chunks(data, N)
# len(data_chunks)


Out[4]:
7

In [97]:
# PREDICTION TIME
# print 'Predicting...', len(data), 'images'
prediction_list = []
test_files_list = []
next_key = ''
# for ii, data_chunk in enumerate(data_chunks):
first_run = True
while next_key or first_run:
    print 'Starting at key: ', next_key
    
    first_run = False
    data_chunk, next_key = my_io.load_lmdb_chunk(TEST_DB, next_key, N)
    sys.stdout.flush()
    start = time.time()
    test_files_chunk, images_chunk, fake_labels = zip(*data_chunk)
#     prediction = net.predict(images_chunk)
    prediction = np.array([1])
    prediction_list.append(prediction)
    test_files_list.append(test_files_chunk)
    print "Done in %.2f s." % (time.time() - start)
    sys.stdout.flush()
    
predictions = np.concatenate(prediction_list)
test_files = list(itertools.chain(*test_files_list))
print "Done predicting"


Starting at key:  
Done in 0.00 s.
Starting at key:  00001000_/afs/ee.cooper.edu/user/t/a/tam8/data/ndsb/train/diatom_chain_tube/103660.jpg
Done in 0.00 s.
Starting at key:  00002000_/afs/ee.cooper.edu/user/t/a/tam8/data/ndsb/train/copepod_calanoid_large/103657.jpg
Done in 0.00 s.
Starting at key:  00003000_/afs/ee.cooper.edu/user/t/a/tam8/data/ndsb/train/protist_other/112696.jpg
Done in 0.00 s.
Starting at key:  00004000_/afs/ee.cooper.edu/user/t/a/tam8/data/ndsb/train/hydromedusae_shapeA_sideview_small/103688.jpg
Done in 0.00 s.
Starting at key:  00005000_/afs/ee.cooper.edu/user/t/a/tam8/data/ndsb/train/protist_other/103790.jpg
Done in 0.00 s.
Starting at key:  00006000_/afs/ee.cooper.edu/user/t/a/tam8/data/ndsb/train/trichodesmium_puff/110777.jpg
Done in 0.00 s.
Done predicting

In [94]:
# predictions.shape
import itertools

test_files[-1]


Out[94]:
'/afs/ee.cooper.edu/user/t/a/tam8/data/ndsb/train/tunicate_doliolid_nurse/108008.jpg'

In [ ]:
pickle.dump(predictions, open('/media/raid_arr/data/ndsb/saved_preds/pred.p', 'wb'))

In [6]:
# SUBMISSION CREATION
test_files_all, images, labels = zip(*data)

import tools.submission as sub
f_name='SUBMISSION_PL38000_LEGIT.csv'
sub.make_submission(test_files_all, predictions, f_name=f_name)

print 'Submission created:', f_name


Submission created: SUBMISSION_PL38000_LEGIT.csv

In [8]:
len(data)


Out[8]:
130400