Prepares data for caffe Constructs stratified k=5 folds and shoves data into lmdb
In [13]:
import os
import glob
import numpy as np
from sklearn.cross_validation import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
import pickle
mode = 'aug_train'
# mode = 'test_final'
In [13]:
# Grabbing all image paths and labels
if mode == 'train_folds':
path_data = '../../data/ndsb/train'
labels = os.listdir(path_data)
le = LabelEncoder().fit(labels)
files_all = np.array([os.path.abspath(f) for f in glob.glob(os.path.join(path_data, '*', '*.jpg'))])
y_str = [os.path.basename(os.path.dirname(f)) for f in files_all]
y_enc = le.transform(y_str)
pickle.dump(le, open('./tools/le.p', 'wb'))
In [19]:
# Create k=5 .txt files that specify train & test images for each fold
if mode == 'train_folds':
k = 5
skf = StratifiedKFold(y_enc, n_folds=k)
save_fn = lambda name, f_mode, y_mode: np.savetxt(
os.path.join(path_save, str(name) + '.txt'),
np.c_[f_mode, y_mode],
fmt='%s', delimiter='\t')
save_fn('train_all', files_all, y_enc)
for fold_ii, (train_ind, test_ind) in enumerate(skf):
f_train, f_test = files_all[train_ind], files_all[test_ind]
y_train, y_test = y_enc[train_ind], y_enc[test_ind]
save_fn('train' + str(fold_ii), f_train, y_train)
save_fn('test' + str(fold_ii) , f_test, y_test)
In [7]:
if mode == 'test_final':
# path_data = '../../data/ndsb/test'
save_name = 'test-1.txt'
path_data = '/data/ndsb/test'
files_all = np.array([os.path.abspath(f) for f in glob.glob(os.path.join(path_data, '*.jpg'))])
# save_arr = files_all
save_arr = np.c_[files_all, -np.ones(len(files_all)).astype(int)]
np.savetxt(os.path.join(path_save, save_name), save_arr, fmt='%s')
In [17]:
from tools.le import le
if 'aug' in mode:
if mode == 'aug_train':
save_name = 'train_aug.txt'
path_data = '/data/ndsb/augment/train'
elif mode == 'aug_test':
save_name = 'test_aug.txt'
path_data = '/data/ndsb/augment/test'
path_save = '/data/ndsb/augment'
files_all = np.array([os.path.abspath(f) for f in glob.glob(os.path.join(path_data, '*', '*.jpg'))])
if mode == 'aug_train':
lbl_str = [os.path.split(os.path.split(f)[0])[1] for f in files_all]
lbl_n = le.transform(lbl_str)
elif mode == 'aug_test':
lbl_n = -np.ones(len(files_all)).astype(int)
save_arr = np.c_[files_all, lbl_n]
np.savetxt(os.path.join(path_save, save_name), save_arr, fmt='%s')
print os.path.join(path_save, save_name)
In [16]:
from tools.le import le
le.transform('copepod_calanoid_eggs')
Out[16]: