In [1]:
import numpy as np
import h5py
from PIL import Image
import matplotlib.pyplot as plt
import time
import lmdb
from caffe.proto import caffe_pb2
import caffe
import matplotlib.pyplot as plt
import tools.my_io as my_io
%matplotlib inline
PRETRAINED = '/media/raid_arr/data/ndsb/models/pl_iter_56000.caffemodel'
MODEL_FILE = './deploy_deeper.prototxt'
MEAN_FILE = '/media/raid_arr/data/ndsb/augment/testaug_mean.npy'
LAYER = 'fc2'
LAYER2 = 'fc1'
OUTPUT = '/media/raid_arr/data/ndsb/features/pl_56000_feats'
# N_MBATCH = 1000
N = 10000 # Chunk size
TEST_IM = '/media/raid_arr/data/ndsb/augment/train/acantharia_protist/100224_rot0.jpg'
VALIDATION_DB = './data/64x64/ndsb_test_lmdb'
# TRAIN_DB = '/media/raid_arr/data/ndsb/augment/ndsb_trainaug_lmdb/'
TRAIN_DB = '/media/raid_arr/data/ndsb/ndsb_train_lmdb'
TEST_DB = '/data/ndsb/ndsb_test_lmdb'
FEAT_OUT = '/media/raid_arr/data/ndsb/features_test.hdf5'
In [2]:
# image_dims = data[0][1].shape[:2]
caffe.set_mode_gpu()
# caffe.set_phase_test()
image_dims = (64, 64)
crop_dims = np.array([57, 57])
mean=np.load(MEAN_FILE)
mean.shape
mean_resized = caffe.io.resize_image(mean.transpose((1,2,0)), crop_dims).transpose((2,0,1))
net = caffe.Classifier(MODEL_FILE, PRETRAINED,
mean=mean_resized,
raw_scale=1.0, # 255 if load from caffe.io, 1.0 if load from my_io lmdb
image_dims=image_dims)
n_feats2 = net.blobs['fc2'].data.shape[1]
n_feats1 = net.blobs['fc1'].data.shape[1]
In [8]:
# im = caffe.io.load_image(TEST_IM, color=False)
for k, v in net.blobs.items():
print k, v.data.shape
In [219]:
# Create new h5 file
f.close()
f = h5py.File(FEAT_OUT, 'w')
In [220]:
# Make Groups
fc2_db = f.create_dataset("fc2", shape=(N, n_feats2), maxshape=(None, n_feats2), dtype='f')
fc1_db = f.create_dataset("fc1", shape=(N, n_feats1), maxshape=(None, n_feats1), dtype='f')
lbls_db = f.create_dataset("lbls", shape=(N,), maxshape=(None,), dtype='i8')
impaths_db = f.create_dataset("im_paths", shape=(N,), maxshape=(None,), dtype='S120')
In [221]:
# PREDICTION TIME
print 'Predicting...', TEST_DB
prediction_list = []
test_files_list = []
next_key = ''
first_run = True
while next_key or first_run:
print 'Starting at key: ', next_key
read_start = time.time()
data_chunk, next_key = my_io.load_lmdb_chunk(TEST_DB, next_key, N)
print "Read done in %.2f s." % (time.time() - read_start)
chunk_len = len(data_chunk)
print 'Chunk size:', chunk_len
sys.stdout.flush()
pred_start = time.time()
im_paths = []
feats_fc2 = []
feats_fc1 = []
lbls = []
if not first_run:
fc2_db.resize(fc2_db.shape[0] + chunk_len, axis=0)
fc1_db.resize(fc1_db.shape[0] + chunk_len, axis=0)
lbls_db.resize(lbls_db.shape[0] + chunk_len, axis=0)
impaths_db.resize(impaths_db.shape[0] + chunk_len, axis=0)
for ii, (im_path, im, lbl) in enumerate(data_chunk):
# print im_path
# print im.shape
# print lbl
prediction = net.predict([im])
feat_fc2 = np.squeeze(net.blobs['fc2'].data.mean(0))
feat_fc1 = np.squeeze(net.blobs['fc1'].data.mean(0))
feats_fc2.append(feat_fc2)
feats_fc1.append(feat_fc1)
lbls.append(lbl)
im_paths.append(im_path)
fc2_db[-chunk_len:] = np.array(feats_fc2)
fc1_db[-chunk_len:] = np.array(feats_fc1)
lbls_db[-chunk_len:] = np.array(lbls)
impaths_db[-chunk_len:] = np.array(im_paths)
# im_path_chunk, images_chunk, labels_chunk = zip(*data_chunk)
# prediction = net.predict(images_chunk)
# prediction_list.append(prediction)
# test_files_list.append(test_files_chunk)
print "Pred done in %.2f s." % (time.time() - pred_start)
sys.stdout.flush()
first_run = False
# predictions = np.concatenate(prediction_list)
# test_files = list(itertools.chain(*test_files_list))
print "Done predicting"
In [38]:
start = time.time()
prediction = net.predict(images)
print "Done in %.2f s." % (time.time() - start)
In [52]:
feats = []
y = []
tic = time.time()
for ii, (im_path, im, lbl) in enumerate(data):
# print im_path
# print im.shape
# print lbl
prediction = net.predict([im])
fc2 = np.squeeze(net.blobs['fc2'].data.mean(0))
feats.append(fc2)
y.append(lbl)
if ii%1000==0:
print ii
# break
# prediction.shape
# fc2.shape
print "Done in %.2f s." % (time.time() - tic)
In [64]:
y = np.array(y)
feats_arr = np.array(feats)
Out[64]:
In [66]:
from sklearn import svm
from sklearn import cross_validation
clf = svm.SVC()
scores = cross_validation.cross_val_score(clf, feats_arr, y, cv=5)
In [209]:
f.keys()
Out[209]:
In [ ]: