In [1]:
import numpy as np
import h5py
from PIL import Image
import matplotlib.pyplot as plt
import time
import lmdb
from caffe.proto import caffe_pb2
import caffe
import matplotlib.pyplot as plt
import tools.my_io as my_io
%matplotlib inline


PRETRAINED = '/media/raid_arr/data/ndsb/models/pl_iter_56000.caffemodel'
MODEL_FILE = './deploy_deeper.prototxt'
MEAN_FILE = '/media/raid_arr/data/ndsb/augment/testaug_mean.npy'
LAYER = 'fc2'
LAYER2 = 'fc1'
OUTPUT = '/media/raid_arr/data/ndsb/features/pl_56000_feats'
# N_MBATCH = 1000
N = 10000   # Chunk size
TEST_IM = '/media/raid_arr/data/ndsb/augment/train/acantharia_protist/100224_rot0.jpg'
VALIDATION_DB = './data/64x64/ndsb_test_lmdb'
# TRAIN_DB = '/media/raid_arr/data/ndsb/augment/ndsb_trainaug_lmdb/'
TRAIN_DB = '/media/raid_arr/data/ndsb/ndsb_train_lmdb'
TEST_DB = '/data/ndsb/ndsb_test_lmdb'

FEAT_OUT = '/media/raid_arr/data/ndsb/features_test.hdf5'

In [2]:
# image_dims = data[0][1].shape[:2]
caffe.set_mode_gpu()
# caffe.set_phase_test()
image_dims = (64, 64)
crop_dims = np.array([57, 57])

mean=np.load(MEAN_FILE)
mean.shape
mean_resized = caffe.io.resize_image(mean.transpose((1,2,0)), crop_dims).transpose((2,0,1))

net = caffe.Classifier(MODEL_FILE, PRETRAINED,
                       mean=mean_resized,
                       raw_scale=1.0,    # 255 if load from caffe.io, 1.0 if load from my_io lmdb
                       image_dims=image_dims)

n_feats2 = net.blobs['fc2'].data.shape[1]
n_feats1 = net.blobs['fc1'].data.shape[1]

In [8]:
# im = caffe.io.load_image(TEST_IM, color=False)
for k, v in net.blobs.items():
    print k, v.data.shape


data (10, 1, 57, 57)
conv1 (10, 96, 14, 14)
pool1 (10, 96, 7, 7)
conv2 (10, 256, 7, 7)
pool2 (10, 256, 3, 3)
conv3 (10, 384, 3, 3)
conv4 (10, 384, 3, 3)
conv5 (10, 256, 3, 3)
pool5 (10, 256, 1, 1)
fc1 (10, 2048, 1, 1)
fc2 (10, 2048, 1, 1)
fc3 (10, 121, 1, 1)
prob (10, 121, 1, 1)

In [219]:
# Create new h5 file
f.close()
f = h5py.File(FEAT_OUT, 'w')

In [220]:
# Make Groups
fc2_db = f.create_dataset("fc2", shape=(N, n_feats2), maxshape=(None, n_feats2), dtype='f')
fc1_db = f.create_dataset("fc1", shape=(N, n_feats1), maxshape=(None, n_feats1), dtype='f')
lbls_db = f.create_dataset("lbls", shape=(N,), maxshape=(None,), dtype='i8')
impaths_db = f.create_dataset("im_paths", shape=(N,), maxshape=(None,), dtype='S120')

In [221]:
# PREDICTION TIME
print 'Predicting...', TEST_DB
prediction_list = []
test_files_list = []
next_key = ''
first_run = True
while next_key or first_run:
    print 'Starting at key: ', next_key
    read_start = time.time()
    data_chunk, next_key = my_io.load_lmdb_chunk(TEST_DB, next_key, N)
    print "Read done in %.2f s." % (time.time() - read_start)
    chunk_len = len(data_chunk)
    print 'Chunk size:', chunk_len
    sys.stdout.flush()
    pred_start = time.time()
    
    
    im_paths = []
    feats_fc2 = []
    feats_fc1 = []
    lbls = []
    if not first_run:
        fc2_db.resize(fc2_db.shape[0] + chunk_len, axis=0)
        fc1_db.resize(fc1_db.shape[0] + chunk_len, axis=0)
        lbls_db.resize(lbls_db.shape[0] + chunk_len, axis=0)
        impaths_db.resize(impaths_db.shape[0] + chunk_len, axis=0)
    for ii, (im_path, im, lbl) in enumerate(data_chunk):
    #     print im_path
    #     print im.shape
    #     print lbl
        prediction = net.predict([im])
        feat_fc2 = np.squeeze(net.blobs['fc2'].data.mean(0))
        feat_fc1 = np.squeeze(net.blobs['fc1'].data.mean(0))
        feats_fc2.append(feat_fc2)
        feats_fc1.append(feat_fc1)
        lbls.append(lbl)
        im_paths.append(im_path)
    fc2_db[-chunk_len:] = np.array(feats_fc2)
    fc1_db[-chunk_len:] = np.array(feats_fc1)
    lbls_db[-chunk_len:] = np.array(lbls)
    impaths_db[-chunk_len:] = np.array(im_paths)
        
    
#     im_path_chunk, images_chunk, labels_chunk = zip(*data_chunk)
#     prediction = net.predict(images_chunk)
#     prediction_list.append(prediction)
#     test_files_list.append(test_files_chunk)
    print "Pred done in %.2f s." % (time.time() - pred_start)
    sys.stdout.flush()
    first_run = False
    
# predictions = np.concatenate(prediction_list)
# test_files = list(itertools.chain(*test_files_list))
print "Done predicting"


Predicting... /data/ndsb/ndsb_test_lmdb
Starting at key:  
Read done in 1.47 s.
Chunk size: 10000
Pred done in 225.60 s.
Starting at key:  00010000_/data/ndsb/test/131890.jpg
Read done in 1.51 s.
Chunk size: 10000
Pred done in 225.63 s.
Starting at key:  00020000_/data/ndsb/test/143698.jpg
Read done in 1.46 s.
Chunk size: 10000
Pred done in 225.60 s.
Starting at key:  00030000_/data/ndsb/test/127171.jpg
Read done in 1.43 s.
Chunk size: 10000
Pred done in 225.53 s.
Starting at key:  00040000_/data/ndsb/test/9227.jpg
Read done in 1.41 s.
Chunk size: 10000
Pred done in 225.57 s.
Starting at key:  00050000_/data/ndsb/test/132350.jpg
Read done in 1.41 s.
Chunk size: 10000
Pred done in 225.58 s.
Starting at key:  00060000_/data/ndsb/test/74795.jpg
Read done in 1.38 s.
Chunk size: 10000
Pred done in 225.54 s.
Starting at key:  00070000_/data/ndsb/test/135220.jpg
Read done in 1.37 s.
Chunk size: 10000
Pred done in 225.58 s.
Starting at key:  00080000_/data/ndsb/test/56438.jpg
Read done in 1.35 s.
Chunk size: 10000
Pred done in 225.58 s.
Starting at key:  00090000_/data/ndsb/test/45426.jpg
Read done in 1.33 s.
Chunk size: 10000
Pred done in 225.65 s.
Starting at key:  00100000_/data/ndsb/test/64677.jpg
Read done in 1.31 s.
Chunk size: 10000
Pred done in 225.60 s.
Starting at key:  00110000_/data/ndsb/test/111353.jpg
Read done in 1.30 s.
Chunk size: 10000
Pred done in 225.55 s.
Starting at key:  00120000_/data/ndsb/test/145818.jpg
Read done in 1.28 s.
Chunk size: 10000
Pred done in 225.57 s.
Starting at key:  00130000_/data/ndsb/test/153380.jpg
Read done in 0.06 s.
Chunk size: 400
Pred done in 9.03 s.
Done predicting

In [38]:
start = time.time()
prediction = net.predict(images)
print "Done in %.2f s." % (time.time() - start)


Done in 131.15 s.

In [52]:
feats = []
y = []
tic = time.time()
for ii, (im_path, im, lbl) in enumerate(data):
#     print im_path
#     print im.shape
#     print lbl
    prediction = net.predict([im])
    fc2 = np.squeeze(net.blobs['fc2'].data.mean(0))
    feats.append(fc2)
    y.append(lbl)
    
    if ii%1000==0:
        print ii
#     break
# prediction.shape
# fc2.shape
print "Done in %.2f s." % (time.time() - tic)


0
1000
2000
3000
4000
5000
6000
Done in 136.09 s.

In [64]:
y = np.array(y)
feats_arr = np.array(feats)


Out[64]:
0

In [66]:
from sklearn import svm
from sklearn import cross_validation
clf = svm.SVC()

scores = cross_validation.cross_val_score(clf, feats_arr, y, cv=5)


/usr/lib/python2.7/site-packages/sklearn/cross_validation.py:413: Warning: The least populated class in y has only 2 members, which is too few. The minimum number of labels for any class cannot be less than n_folds=5.
  % (min_labels, self.n_folds)), Warning)

In [209]:
f.keys()


Out[209]:
[u'fc1', u'fc2', u'im_paths', u'lbls']

In [ ]: