Use the extract_test_feats
noetbook if you need to redo the
full db extraction on train or test
In [1]:
import plyvel
import os
import numpy as np
TRAIN_FULL_PATH = '/media/raid_arr/tmp/normed_lvl' # DB to split
TRAIN_FOLD_PATH = '/media/raid_arr/tmp/train0_norm_lvl' # Output train
TEST_FOLD_PATH = '/media/raid_arr/tmp/test0_norm_lvl' # Output test
TRAIN_FOLD_TXT = '/media/raid_arr/data/ndsb/folds/train0.txt'
TEST_FOLD_TXT = '/media/raid_arr/data/ndsb/folds/test0.txt'
In [2]:
# Grab the image paths from the folds that were already generated
train_fold_paths = np.loadtxt(TRAIN_FOLD_TXT, delimiter='\t', dtype=str)[:, 0]
test_fold_paths = np.loadtxt(TEST_FOLD_TXT, delimiter='\t', dtype=str)[:, 0]
In [10]:
# Get image name from path and shuffle order
train_fold_names = np.array([os.path.basename(p) for p in train_fold_paths])
test_fold_names = np.array([os.path.basename(p) for p in test_fold_paths])
np.random.shuffle(train_fold_names)
np.random.shuffle(test_fold_names)
In [77]:
# Open new db's and fill
db_train_full = plyvel.DB(TRAIN_FULL_PATH)
db_train_fold = plyvel.DB(TRAIN_FOLD_PATH, create_if_missing=True)
wb_train_fold = db_train_fold.write_batch()
db_test_fold = plyvel.DB(TEST_FOLD_PATH, create_if_missing=True)
wb_test_fold = db_test_fold.write_batch()
# Insert in shuffled order
for im_name in train_fold_names:
db_train_fold.put(str(im_name), db_train_full.get(str(im_name)))
for im_name in test_fold_names:
db_test_fold.put(str(im_name), db_train_full.get(str(im_name)))
wb_train_fold.write()
wb_test_fold.write()
db_train_full.close()
db_train_fold.close()
db_test_fold.close()
In [76]:
db_train_full.close()
db_train_fold.close()
db_test_fold.close()
In [78]:
In [ ]:
In [24]:
from random import shuffle
import my_io
reload(my_io)
shuffle(train_fold_paths)
shuffle(test_fold_paths)
In [25]:
TRAIN_FOLD_PATH = '/media/raid_arr/tmp/train0_norm_lmdb' # Output train
TEST_FOLD_PATH = '/media/raid_arr/tmp/test0_norm_lmdb' # Output test
my_io.multi_extract(train_fold_paths, TRAIN_FOLD_PATH, backend='lmdb',
perturb=False, verbose=True)
my_io.single_extract(train_fold_paths, TRAIN_FOLD_PATH, backend='lmdb',
perturb=False, verbose=True)
my_io.single_extract(test_fold_paths, TEST_FOLD_PATH, backend='lmdb',
perturb=False, verbose=True)
In [ ]:
# Create aug
TRAIN_FOLD_PATH = '/media/raid_arr/tmp/train0_normaug_lvl' # Output train
my_io.multi_extract(train_fold_paths, TRAIN_FOLD_PATH, backend='leveldb',
perturb=True, verbose=True)
my_io.single_extract(train_fold_paths, TRAIN_FOLD_PATH, backend='lmdb',
perturb=False, verbose=True)
In [4]:
my_io.single_extract(test_fold_paths, TEST_FOLD_PATH, perturb=False, verbose=True)
In [1]:
from caffe.proto import caffe_pb2
from time import time
import lmdb
import numpy as np
In [80]:
# DB = '/media/raid_arr/tmp/train0_norm_lmdb/'
# DB_FEATS = '/media/raid_arr/tmp/train0_norm_feats_lmdb'
DB = '/media/raid_arr/tmp/test0_norm_lmdb/'
DB_FEATS = '/media/raid_arr/tmp/test0_norm_feats_lmdb'
# DB = '/media/raid_arr/tmp/train0_norm_lmdb/'
# DB_FEATS = '/media/raid_arr/tmp/train0_norm_feats_simp_lmdb'
# DB = '/media/raid_arr/tmp/test0_norm_lmdb/'
# DB_FEATS = '/media/raid_arr/tmp/test0_norm_feats_simp_lmdb'
def make_feats_db(core_db=DB, feats_db=DB_FEATS, verbose=False):
db = lmdb.open(core_db)
db_feats = lmdb.open(feats_db, map_size=1e12)
txn = db.begin()
c = txn.cursor()
txn_feats = db_feats.begin(write=True)
std_scale = 2.
tic = time()
for k, v in c:
datum = caffe_pb2.Datum()
datum.ParseFromString(v)
extra_feats = np.array([
datum.orig_space,
datum.orig_height,
datum.orig_width,
datum.extent,
datum.hu1,
datum.hu2,
datum.hu3,
datum.hu4,
datum.hu5,
datum.hu6,
datum.hu7,
datum.solidity,
])[None, None, :]
datum.channels, datum.height, datum.width = extra_feats.shape
scale_map = ((extra_feats + std_scale) * 128./std_scale).clip(0, 255).astype('uint8') # 2 std
datum.data = scale_map.tobytes()
# datum.float_data.extend(extra_feats.flat)
v_feats = datum.SerializeToString()
txn_feats.put(k, v_feats)
txn_feats.commit()
db.close()
db_feats.close()
if verbose:
print 'Feat transfer done:', time() - tic
In [79]:
make_feats_db(verbose=True)
In [10]:
import my_io
reload(my_io)
DB = '/media/raid_arr/tmp/train0_norm_lmdb/'
DB_FEATS = '/media/raid_arr/tmp/train0_norm_feats_lmdb'
# DB = '/media/raid_arr/tmp/test0_norm_lmdb/'
# DB_FEATS = '/media/raid_arr/tmp/test0_norm_feats_lmdb'
my_io.transfer_feats_db(core_db=DB,
feats_db=DB_FEATS,
backend='lmdb',
verbose=True)
In [11]:
import my_io
reload(my_io)
DB = '/media/raid_arr/tmp/train0_norm_lmdb/'
DB_LBLS = '/media/raid_arr/tmp/train0_norm_lbls_lmdb'
# DB = '/media/raid_arr/tmp/test0_norm_lmdb/'
# DB_LBLS = '/media/raid_arr/tmp/test0_norm_lbls_lmdb'
my_io.transfer_parentlbls_db(core_db=DB,
feats_db=DB_LBLS,
backend='lmdb',
verbose=True)