In [34]:
import numpy as np
import h5py
from PIL import Image
import matplotlib.pyplot as plt
import time
import lmdb
from caffe.proto import caffe_pb2
import caffe
import matplotlib.pyplot as plt
import sys
import tools.my_io as my_io
%matplotlib inline


MODEL_FILE = '/media/raid_arr/data/ndsb/config/deploy_cnn_v3_maxout_supersparse.prototxt'
PRETRAINED = '/media/raid_arr/data/ndsb/models/zoomed_out_vanilla_smallmaxout/simple_fold0_iter_3000.caffemodel'

MEAN_VALUE = 23

LAYER = 'maxfc7'

# N_MBATCH = 1000
N = 10000   # Chunk size

IMAGE_FILE = '/afs/ee.cooper.edu/user/t/a/tam8/data/ndsb/train/acantharia_protist/100224.jpg'
TRAIN_DB = '/media/raid_arr/tmp/train0_norm_lmdb/'
VAL_DB = '/media/raid_arr/tmp/test0_norm_lmdb'

TRAIN_FEAT_OUT = '/media/raid_arr/data/ndsb/features/train_cnnv3_maxout_noaug.hdf5'
VAL_FEAT_OUT = '/media/raid_arr/data/ndsb/features/val_cnnv3_maxout_noaug.hdf5'

In [3]:
# Loading From Database
print 'Loading data...'
tic = time.time()
data = my_io.load_lmdb(TEST_DB)
print "Done in %.2f s." % (time.time() - tic)

val_files_all, images, labels = zip(*data)
test_labels = labels


Loading data...
Done in 2.40 s.

In [7]:
image_dims = images[0].shape[:2]
# image_dims = (57, 57)
print image_dims

net = caffe.Classifier(MODEL_FILE, PRETRAINED,
                       mean=np.array([MEAN_VALUE]),
                       raw_scale=1.0,    # 255 if load from caffe.io, 1.0 if load from my_io lmdb
                       image_dims=image_dims,)
#                        gpu=True)
# caffe.set_phase_test()
caffe.set_mode_gpu()

n_feats = net.blobs[LAYER].data.shape[1]


(64, 64)

In [32]:
# im = caffe.io.load_image(TEST_IM, color=False)
print 'Layer Shapes:'
for k, v in net.blobs.items():
    print k, v.data.shape


Layer Shapes:
data (10, 1, 57, 57)
conv1 (10, 48, 26, 26)
pool1 (10, 48, 25, 25)
conv2 (10, 96, 21, 21)
pool2 (10, 96, 20, 20)
conv3 (10, 128, 20, 20)
conv4 (10, 128, 20, 20)
convstack_top (10, 128, 10, 10)
fc6 (10, 2048, 1, 1)
s1fc6 (10, 512, 1, 1)
s2fc6 (10, 512, 1, 1)
s3fc6 (10, 512, 1, 1)
s4fc6 (10, 512, 1, 1)
maxfc6 (10, 512, 1, 1)
fc7 (10, 2048, 1, 1)
s1fc7 (10, 512, 1, 1)
s2fc7 (10, 512, 1, 1)
s3fc7 (10, 512, 1, 1)
s4fc7 (10, 512, 1, 1)
maxfc7 (10, 512, 1, 1)
0fc8 (10, 121, 1, 1)
loss (10, 121, 1, 1)

In [36]:
def get_net_feats(db_data_in, 
                  db_feat_out,
                  layer):
    # Create new h5 file
    try:
      f.close()
    except NameError:
      print 'Opening new db at:', db_feat_out

    f = h5py.File(db_feat_out, 'w')


    # Make Groups
    feat_db = f.create_dataset("feats", shape=(N, n_feats), maxshape=(None, n_feats), dtype='f')
    lbls_db = f.create_dataset("lbls", shape=(N,), maxshape=(None,), dtype='i8')
    impaths_db = f.create_dataset("im_paths", shape=(N,), maxshape=(None,), dtype='S120')


    # PREDICTION TIME
    print 'Predicting...', db_data_in
    prediction_list = []
    test_files_list = []
    next_key = ''
    first_run = True
    while next_key or first_run:
        print 'Starting at key: ', next_key
        read_start = time.time()
        data_chunk, next_key = my_io.load_lmdb_chunk(db_data_in, next_key, N)
        print "Read done in %.2f s." % (time.time() - read_start)
        chunk_len = len(data_chunk)
        print 'Chunk size:', chunk_len
        sys.stdout.flush()
        pred_start = time.time()

        print 'Propagating chunks through net...'
        sys.stdout.flush()
        im_paths = []
        feats = []
        lbls = []
        if not first_run:
            # After the first chunk, we need to resize the db
            feat_db.resize(feat_db.shape[0] + chunk_len, axis=0)
            lbls_db.resize(lbls_db.shape[0] + chunk_len, axis=0)
            impaths_db.resize(impaths_db.shape[0] + chunk_len, axis=0)
        for ii, (im_path, im, lbl) in enumerate(data_chunk):
            prediction = net.predict([im])
            feat = np.squeeze(net.blobs[layer].data.mean(0))
            feats.append(feat)
            lbls.append(lbl)
            im_paths.append(im_path)
        feat_db[-chunk_len:] = np.array(feats)
        lbls_db[-chunk_len:] = np.array(lbls)
        impaths_db[-chunk_len:] = np.array(im_paths)


    #     im_path_chunk, images_chunk, labels_chunk = zip(*data_chunk)
    #     prediction = net.predict(images_chunk)
    #     prediction_list.append(prediction)
    #     test_files_list.append(test_files_chunk)
        print "Pred done in %.2f s." % (time.time() - pred_start)
        sys.stdout.flush()
        first_run = False

    # predictions = np.concatenate(prediction_list)
    # test_files = list(itertools.chain(*test_files_list))
    print "Done predicting"
    print 'DB saved in:', db_feat_out
    return

In [37]:
start = time.time()
# prediction = net.predict(images)
get_net_feats(db_data_in=TEST_DB, 
                  db_feat_out=TEST_FEAT_OUT,
                  layer=LAYER)

print "Done in %.2f s." % (time.time() - start)


Opening new db at: /media/raid_arr/data/ndsb/features/test_cnnv3_maxout_noaug.hdf5
Predicting... /media/raid_arr/tmp/test0_norm_lmdb
Starting at key:  
Read done in 1.78 s.
Chunk size: 6115
Pred done in 252.88 s.
Done predicting
Done in 254.67 s.

In [ ]:
start = time.time()
# prediction = net.predict(images)
get_net_feats(db_data_in=TRAIN_DB, 
                  db_feat_out=TRAIN_FEAT_OUT,
                  layer=LAYER)

print "Done in %.2f s." % (time.time() - start)


Opening new db at: /media/raid_arr/data/ndsb/features/train_cnnv3_maxout_noaug.hdf5
Predicting... /media/raid_arr/tmp/train0_norm_lmdb/
Starting at key:  
Read done in 2.62 s.
Chunk size: 10000

In [38]:
y = np.array(y)
feats_arr = np.array(feats)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-38-59abedf0ad6d> in <module>()
----> 1 y = np.array(y)
      2 feats_arr = np.array(feats)

NameError: name 'y' is not defined

In [66]:
from sklearn import svm
from sklearn import cross_validation
clf = svm.SVC()

scores = cross_validation.cross_val_score(clf, feats_arr, y, cv=5)


/usr/lib/python2.7/site-packages/sklearn/cross_validation.py:413: Warning: The least populated class in y has only 2 members, which is too few. The minimum number of labels for any class cannot be less than n_folds=5.
  % (min_labels, self.n_folds)), Warning)

In [209]:
f.keys()


Out[209]:
[u'fc1', u'fc2', u'im_paths', u'lbls']

In [21]:
feats_arr = np.array(feats)
feats_arr


Out[21]:
array([[-0.5334543 , -1.45886266, -1.85120237, ..., -0.41519433,
        -0.2006965 , -0.43864995],
       [-0.18853374, -0.69259894,  0.75199413, ..., -1.80381894,
         0.55619729,  0.18945086],
       [-1.68894136,  2.09158039,  0.10232283, ...,  1.80399609,
         4.05035686, -0.01457913],
       ..., 
       [-2.45586085, -1.8641001 ,  1.90774083, ..., -2.60364795,
         1.41474581, -0.11888784],
       [ 3.58840704, -1.30984402, -0.91113883, ..., -2.98078585,
         3.51869011,  7.82888508],
       [ 1.53750145,  0.28915527,  4.71641016, ...,  4.58173275,
         0.31663072,  3.23718071]], dtype=float32)

In [30]:
feats_arr.shape


Out[30]:
(6115, 512)