In [1]:
import numpy as np
import pickle
with open('cache/hex.pickle', mode='rb') as h:
hex_data = pickle.load(h)
H_trans = hex_data['H'].T
id_parent = [np.nonzero(H_trans[i])[0][0] for i in range(0, 20)]
with open('cache/df_val_test.pickle', mode='rb') as h:
df_val, df_test = pickle.load(h)
filename_df = {'val': df_val, 'test': df_test}
In [2]:
from os import listdir
import re
for fn in filter(lambda x: x.startswith('df_train'), listdir('cache')):
with open('cache/' + fn, mode='rb') as h:
df_train = pickle.load(h)
filename_df['train.' + re.findall('\d+', fn)[0]] = df_train
In [3]:
import cv2
from os.path import join
img_dir = '../pascal12/JPEGImages'
img_mean = np.load('caffe/ilsvrc12_mean.npy').astype(np.float32) # 3*256*256
img_mean = np.swapaxes(np.swapaxes(img_mean, 0, 1), 1, 2) # convert to XY[BGR]
def load_image(filename):
"""
Loads image, subtract mean, resize to 227*227. No axis rotation or normalization.
Returns:
Result image as an np.float32 array with size 227*227*3. Values within [0, 255].
"""
img = cv2.resize(cv2.imread(join(img_dir, filename)), (256, 256))
return cv2.resize(img.astype(np.float32) - img_mean, (227, 227))
In [4]:
import h5py
def write_h5(filename, X, Y_leaf, Y_parent=None, Y_hierarchy=None):
with h5py.File('../{}.h5'.format(filename), mode='w') as h:
h.create_dataset('X', data=X)
h.create_dataset('Y_leaf', data=Y_leaf)
if Y_parent is not None:
h.create_dataset('Y_parent', data=Y_parent)
if Y_hierarchy is not None:
h.create_dataset('Y_hierarchy', data=Y_hierarchy)
In [5]:
D = 27
def to_sparse(dense): # Converts a bool array from dense representation to sparse representation.
N = len(dense)
sparse = np.zeros((N, D), dtype=bool)
for i in range(0, N):
sparse[i, dense[i]] = 1
return sparse
In [6]:
for fn, df in filename_df.items():
X = map(load_image, df['img'])
X = np.swapaxes(np.swapaxes(X, 2, 3), 1, 2) # convert to [BGR]XY for Caffe
Y_leaf = df['label'].astype(float)
if fn.startswith('train'):
Y_parent = np.array([l if l in p else id_parent[l] for (l, p) in zip(df['label'], df['pseudo_label'])], dtype=float)
Y_hierarchy = to_sparse(df['pseudo_label']).astype(float)
write_h5(fn, X, Y_leaf, Y_parent, Y_hierarchy)
else:
write_h5(fn, X, Y_leaf)