In [10]:
# Augmented training data
# Just augment the folds
import numpy as np
import os
# for mode in ['test', 'train']:
for mode in ['train']:
# for n in range(5):
for n in ['_all']:
# text_file = 'test0.txt'
text_file = mode + str(n) + '.txt'
print 'Converting:', text_file
f_path = os.path.join('./data', text_file)
aug_train_path = '../../data/ndsb/augment/train'
save_path = os.path.join('./data/augmented', text_file)
# save_path = os.path.join('./data/augmented', 'unlabeled' + text_file)
angs = range(0, 360, 45)
suffixes = np.array(['_rot' + str(ang) for ang in angs])
arr = np.loadtxt(f_path, dtype=str, delimiter='\t')
# Could do some weird thing with meshgrid, but I am too tired
with open(save_path, 'wb') as f_save:
for ii, (entry, label) in enumerate(arr):
d, b = os.path.split(entry)
l_str = os.path.basename(d)
b = os.path.splitext(b)[0]
for suf in suffixes:
new_path = os.path.abspath(os.path.join(aug_train_path, l_str, b + suf + '.jpg'))
line = new_path + '\t' + label + '\n'
# line = new_path + '\t' + '-1' + '\n'
f_save.write(line)
if (ii%1000 == 0):
print ii, 'written'
In [39]:
os.path.basename(d)
Out[39]:
In [2]:
import numpy as np
import os
import glob
aug_test_path = '/data/ndsb/augment/test'
orig_test_path = '/data/ndsb/test'
save_path = os.path.join('/data/ndsb/augment/test_aug.txt')
if 1:
files_aug = [os.path.abspath(f) for f in glob.glob(os.path.join(aug_test_path, '*', '*.jpg'))]
files_orig = [os.path.abspath(f) for f in glob.glob(os.path.join(orig_test_path, '*.jpg'))]
files_all = np.r_[files_aug, files_orig]
y = -np.ones(len(files_all), dtype=int)
np.savetxt(save_path, np.c_[files_all, y], fmt='%s', delimiter='\t')
print save_path, 'saved'
In [7]:
print len(files_aug)
print len(files_orig)
print len(files_all)
In [39]:
#######################################
import numpy as np
import os
import glob
from sklearn.cross_validation import StratifiedKFold
from tools.le import le
TRAIN_PATH = '/media/raid_arr/data/ndsb/train/'
TRAIN_AUG_PATH = '/media/raid_arr/data/ndsb/augment/train/'
labels = os.listdir(TRAIN_PATH)
files_all = np.array([os.path.abspath(f) for f in glob.glob(os.path.join(TRAIN_PATH, '*', '*.jpg'))])
y_str = [os.path.basename(os.path.dirname(f)) for f in files_all]
y = le.transform(y_str)
In [51]:
SAVE_PATH = '/media/raid_arr/data/ndsb/folds'
k = 5
skf = StratifiedKFold(y, n_folds=k, shuffle=True, random_state=0)
save_fn = lambda name, f_mode, y_mode: np.savetxt(
os.path.join(SAVE_PATH, str(name) + '.txt'),
np.c_[f_mode, y_mode],
fmt='%s', delimiter='\t')
angs = range(0, 360, 45)
suffixes = np.array(['_rot' + str(ang) for ang in angs])
aug_name = lambda f_i, suff: os.path.join(TRAIN_AUG_PATH, os.path.basename(os.path.dirname(f_i)),
os.path.splitext(os.path.basename(f_i))[0] +
suff + os.path.splitext(os.path.basename(f_i))[1])
save_fn('train_all', files_all, y)
for fold_ii, (train_ind, test_ind) in enumerate(skf):
f_train, f_test = files_all[train_ind], files_all[test_ind]
y_train, y_test = y[train_ind], y[test_ind]
save_fn('train' + str(fold_ii), f_train, y_train)
save_fn('test' + str(fold_ii) , f_test, y_test)
# Aug
f_train_aug = np.array([aug_name(f_i, suff) for f_i in f_train for suff in suffixes])
y_train_aug = np.array([y_i for y_i in y_train for suff in suffixes])
save_fn('train_aug' + str(fold_ii), f_train_aug, y_train_aug)
In [50]:
os.path.basename(os.path.dirname(f_i))
Out[50]: