In [34]:
import numpy as np
import h5py
from PIL import Image
import matplotlib.pyplot as plt
import time
import lmdb
from caffe.proto import caffe_pb2
import caffe
import matplotlib.pyplot as plt
import sys
import tools.my_io as my_io
%matplotlib inline
MODEL_FILE = '/media/raid_arr/data/ndsb/config/deploy_cnn_v3_maxout_supersparse.prototxt'
PRETRAINED = '/media/raid_arr/data/ndsb/models/zoomed_out_vanilla_smallmaxout/simple_fold0_iter_3000.caffemodel'
MEAN_VALUE = 23
LAYER = 'maxfc7'
# N_MBATCH = 1000
N = 10000 # Chunk size
IMAGE_FILE = '/afs/ee.cooper.edu/user/t/a/tam8/data/ndsb/train/acantharia_protist/100224.jpg'
TRAIN_DB = '/media/raid_arr/tmp/train0_norm_lmdb/'
VAL_DB = '/media/raid_arr/tmp/test0_norm_lmdb'
TRAIN_FEAT_OUT = '/media/raid_arr/data/ndsb/features/train_cnnv3_maxout_noaug.hdf5'
VAL_FEAT_OUT = '/media/raid_arr/data/ndsb/features/val_cnnv3_maxout_noaug.hdf5'
In [3]:
# Loading From Database
print 'Loading data...'
tic = time.time()
data = my_io.load_lmdb(TEST_DB)
print "Done in %.2f s." % (time.time() - tic)
val_files_all, images, labels = zip(*data)
test_labels = labels
In [7]:
image_dims = images[0].shape[:2]
# image_dims = (57, 57)
print image_dims
net = caffe.Classifier(MODEL_FILE, PRETRAINED,
mean=np.array([MEAN_VALUE]),
raw_scale=1.0, # 255 if load from caffe.io, 1.0 if load from my_io lmdb
image_dims=image_dims,)
# gpu=True)
# caffe.set_phase_test()
caffe.set_mode_gpu()
n_feats = net.blobs[LAYER].data.shape[1]
In [32]:
# im = caffe.io.load_image(TEST_IM, color=False)
print 'Layer Shapes:'
for k, v in net.blobs.items():
print k, v.data.shape
In [36]:
def get_net_feats(db_data_in,
db_feat_out,
layer):
# Create new h5 file
try:
f.close()
except NameError:
print 'Opening new db at:', db_feat_out
f = h5py.File(db_feat_out, 'w')
# Make Groups
feat_db = f.create_dataset("feats", shape=(N, n_feats), maxshape=(None, n_feats), dtype='f')
lbls_db = f.create_dataset("lbls", shape=(N,), maxshape=(None,), dtype='i8')
impaths_db = f.create_dataset("im_paths", shape=(N,), maxshape=(None,), dtype='S120')
# PREDICTION TIME
print 'Predicting...', db_data_in
prediction_list = []
test_files_list = []
next_key = ''
first_run = True
while next_key or first_run:
print 'Starting at key: ', next_key
read_start = time.time()
data_chunk, next_key = my_io.load_lmdb_chunk(db_data_in, next_key, N)
print "Read done in %.2f s." % (time.time() - read_start)
chunk_len = len(data_chunk)
print 'Chunk size:', chunk_len
sys.stdout.flush()
pred_start = time.time()
print 'Propagating chunks through net...'
sys.stdout.flush()
im_paths = []
feats = []
lbls = []
if not first_run:
# After the first chunk, we need to resize the db
feat_db.resize(feat_db.shape[0] + chunk_len, axis=0)
lbls_db.resize(lbls_db.shape[0] + chunk_len, axis=0)
impaths_db.resize(impaths_db.shape[0] + chunk_len, axis=0)
for ii, (im_path, im, lbl) in enumerate(data_chunk):
prediction = net.predict([im])
feat = np.squeeze(net.blobs[layer].data.mean(0))
feats.append(feat)
lbls.append(lbl)
im_paths.append(im_path)
feat_db[-chunk_len:] = np.array(feats)
lbls_db[-chunk_len:] = np.array(lbls)
impaths_db[-chunk_len:] = np.array(im_paths)
# im_path_chunk, images_chunk, labels_chunk = zip(*data_chunk)
# prediction = net.predict(images_chunk)
# prediction_list.append(prediction)
# test_files_list.append(test_files_chunk)
print "Pred done in %.2f s." % (time.time() - pred_start)
sys.stdout.flush()
first_run = False
# predictions = np.concatenate(prediction_list)
# test_files = list(itertools.chain(*test_files_list))
print "Done predicting"
print 'DB saved in:', db_feat_out
return
In [37]:
start = time.time()
# prediction = net.predict(images)
get_net_feats(db_data_in=TEST_DB,
db_feat_out=TEST_FEAT_OUT,
layer=LAYER)
print "Done in %.2f s." % (time.time() - start)
In [ ]:
start = time.time()
# prediction = net.predict(images)
get_net_feats(db_data_in=TRAIN_DB,
db_feat_out=TRAIN_FEAT_OUT,
layer=LAYER)
print "Done in %.2f s." % (time.time() - start)
In [38]:
y = np.array(y)
feats_arr = np.array(feats)
In [66]:
from sklearn import svm
from sklearn import cross_validation
clf = svm.SVC()
scores = cross_validation.cross_val_score(clf, feats_arr, y, cv=5)
In [209]:
f.keys()
Out[209]:
In [21]:
feats_arr = np.array(feats)
feats_arr
Out[21]:
In [30]:
feats_arr.shape
Out[30]: