Use the extract_test_feats noetbook if you need to redo the
full db extraction on train or test
In [1]:
    
import plyvel
import os
import numpy as np
TRAIN_FULL_PATH = '/media/raid_arr/tmp/normed_lvl'    # DB to split
TRAIN_FOLD_PATH = '/media/raid_arr/tmp/train0_norm_lvl'    # Output train
TEST_FOLD_PATH = '/media/raid_arr/tmp/test0_norm_lvl'    # Output test
TRAIN_FOLD_TXT = '/media/raid_arr/data/ndsb/folds/train0.txt'
TEST_FOLD_TXT = '/media/raid_arr/data/ndsb/folds/test0.txt'
    
In [2]:
    
# Grab the image paths from the folds that were already generated
train_fold_paths = np.loadtxt(TRAIN_FOLD_TXT, delimiter='\t', dtype=str)[:, 0]
test_fold_paths = np.loadtxt(TEST_FOLD_TXT, delimiter='\t', dtype=str)[:, 0]
    
In [10]:
    
# Get image name from path and shuffle order
train_fold_names = np.array([os.path.basename(p) for p in train_fold_paths])
test_fold_names = np.array([os.path.basename(p) for p in test_fold_paths])
np.random.shuffle(train_fold_names)
np.random.shuffle(test_fold_names)
    
In [77]:
    
# Open new db's and fill
db_train_full = plyvel.DB(TRAIN_FULL_PATH)
db_train_fold = plyvel.DB(TRAIN_FOLD_PATH, create_if_missing=True)
wb_train_fold = db_train_fold.write_batch()
db_test_fold = plyvel.DB(TEST_FOLD_PATH, create_if_missing=True)
wb_test_fold = db_test_fold.write_batch()
# Insert in shuffled order
for im_name in train_fold_names:
    db_train_fold.put(str(im_name), db_train_full.get(str(im_name)))
for im_name in test_fold_names:
    db_test_fold.put(str(im_name), db_train_full.get(str(im_name)))
    
wb_train_fold.write()
wb_test_fold.write()
db_train_full.close()
db_train_fold.close()
db_test_fold.close()
    
In [76]:
    
db_train_full.close()
db_train_fold.close()
db_test_fold.close()
    
In [78]:
    
    
In [ ]:
    
    
In [24]:
    
from random import shuffle
import my_io
reload(my_io)
shuffle(train_fold_paths)
shuffle(test_fold_paths)
    
In [25]:
    
TRAIN_FOLD_PATH = '/media/raid_arr/tmp/train0_norm_lmdb'    # Output train
TEST_FOLD_PATH = '/media/raid_arr/tmp/test0_norm_lmdb'    # Output test
my_io.multi_extract(train_fold_paths, TRAIN_FOLD_PATH, backend='lmdb',
                    perturb=False, verbose=True)
my_io.single_extract(train_fold_paths, TRAIN_FOLD_PATH, backend='lmdb',
                    perturb=False, verbose=True)
my_io.single_extract(test_fold_paths, TEST_FOLD_PATH, backend='lmdb',
                    perturb=False, verbose=True)
    
    
    
In [ ]:
    
# Create aug
TRAIN_FOLD_PATH = '/media/raid_arr/tmp/train0_normaug_lvl'    # Output train
my_io.multi_extract(train_fold_paths, TRAIN_FOLD_PATH, backend='leveldb',
                    perturb=True, verbose=True)
my_io.single_extract(train_fold_paths, TRAIN_FOLD_PATH, backend='lmdb',
                    perturb=False, verbose=True)
    
In [4]:
    
my_io.single_extract(test_fold_paths, TEST_FOLD_PATH, perturb=False, verbose=True)
    
    
    
In [1]:
    
from caffe.proto import caffe_pb2
from time import time
import lmdb
import numpy as np
    
In [80]:
    
# DB = '/media/raid_arr/tmp/train0_norm_lmdb/'
# DB_FEATS = '/media/raid_arr/tmp/train0_norm_feats_lmdb'
DB = '/media/raid_arr/tmp/test0_norm_lmdb/'
DB_FEATS = '/media/raid_arr/tmp/test0_norm_feats_lmdb'
# DB = '/media/raid_arr/tmp/train0_norm_lmdb/'
# DB_FEATS = '/media/raid_arr/tmp/train0_norm_feats_simp_lmdb'
# DB = '/media/raid_arr/tmp/test0_norm_lmdb/'
# DB_FEATS = '/media/raid_arr/tmp/test0_norm_feats_simp_lmdb'
def make_feats_db(core_db=DB, feats_db=DB_FEATS, verbose=False):
    db = lmdb.open(core_db)
    db_feats = lmdb.open(feats_db, map_size=1e12)
    txn = db.begin()
    c = txn.cursor()
    txn_feats = db_feats.begin(write=True)
    std_scale = 2.
    tic = time()
    for k, v in c:
        datum = caffe_pb2.Datum()
        datum.ParseFromString(v)
        extra_feats = np.array([
            datum.orig_space,
            datum.orig_height,
            datum.orig_width,
            datum.extent,
            datum.hu1,
            datum.hu2,
            datum.hu3,
            datum.hu4,
            datum.hu5,
            datum.hu6,
            datum.hu7,
            datum.solidity,
        ])[None, None, :]
        datum.channels, datum.height, datum.width = extra_feats.shape
        scale_map = ((extra_feats + std_scale) * 128./std_scale).clip(0, 255).astype('uint8')  # 2 std
        datum.data = scale_map.tobytes()
    #     datum.float_data.extend(extra_feats.flat)
        v_feats = datum.SerializeToString()
        txn_feats.put(k, v_feats)
    txn_feats.commit()
    db.close()
    db_feats.close()
    
    if verbose:
        print 'Feat transfer done:', time() - tic
    
In [79]:
    
make_feats_db(verbose=True)
    
    
In [10]:
    
import my_io
reload(my_io)
DB = '/media/raid_arr/tmp/train0_norm_lmdb/'
DB_FEATS = '/media/raid_arr/tmp/train0_norm_feats_lmdb'
# DB = '/media/raid_arr/tmp/test0_norm_lmdb/'
# DB_FEATS = '/media/raid_arr/tmp/test0_norm_feats_lmdb'
my_io.transfer_feats_db(core_db=DB, 
                        feats_db=DB_FEATS,
                        backend='lmdb',
                        verbose=True)
    
    
In [11]:
    
import my_io
reload(my_io)
DB = '/media/raid_arr/tmp/train0_norm_lmdb/'
DB_LBLS = '/media/raid_arr/tmp/train0_norm_lbls_lmdb'
# DB = '/media/raid_arr/tmp/test0_norm_lmdb/'
# DB_LBLS = '/media/raid_arr/tmp/test0_norm_lbls_lmdb'
my_io.transfer_parentlbls_db(core_db=DB, 
                        feats_db=DB_LBLS,
                        backend='lmdb',
                        verbose=True)