In [1]:
import numpy as np
import pickle
with open('cache/hex.pickle', mode='rb') as h:
    hex_data = pickle.load(h)
H_trans = hex_data['H'].T
id_parent = [np.nonzero(H_trans[i])[0][0] for i in range(0, 20)]
with open('cache/df_val_test.pickle', mode='rb') as h:
    df_val, df_test = pickle.load(h)
filename_df = {'val': df_val, 'test': df_test}

In [2]:
from os import listdir
import re
for fn in filter(lambda x: x.startswith('df_train'), listdir('cache')):
    with open('cache/' + fn, mode='rb') as h:
        df_train = pickle.load(h)
    filename_df['train.' + re.findall('\d+', fn)[0]] = df_train

In [3]:
import cv2
from os.path import join
img_dir = '../pascal12/JPEGImages'
img_mean = np.load('caffe/ilsvrc12_mean.npy').astype(np.float32)  # 3*256*256
img_mean = np.swapaxes(np.swapaxes(img_mean, 0, 1), 1, 2)  # convert to XY[BGR]
def load_image(filename):
    """
    Loads image, subtract mean, resize to 227*227. No axis rotation or normalization.
    Returns:
        Result image as an np.float32 array with size 227*227*3. Values within [0, 255].
    """
    img = cv2.resize(cv2.imread(join(img_dir, filename)), (256, 256))
    return cv2.resize(img.astype(np.float32) - img_mean, (227, 227))

In [4]:
import h5py
def write_h5(filename, X, Y_leaf, Y_parent=None, Y_hierarchy=None):
    with h5py.File('../{}.h5'.format(filename), mode='w') as h:
        h.create_dataset('X', data=X)
        h.create_dataset('Y_leaf', data=Y_leaf)
        if Y_parent is not None:
            h.create_dataset('Y_parent', data=Y_parent)
        if Y_hierarchy is not None:
            h.create_dataset('Y_hierarchy', data=Y_hierarchy)

In [5]:
D = 27
def to_sparse(dense):  # Converts a bool array from dense representation to sparse representation.
    N = len(dense)
    sparse = np.zeros((N, D), dtype=bool)
    for i in range(0, N):
        sparse[i, dense[i]] = 1
    return sparse

In [6]:
for fn, df in filename_df.items():
    X = map(load_image, df['img'])
    X = np.swapaxes(np.swapaxes(X, 2, 3), 1, 2)  # convert to [BGR]XY for Caffe
    Y_leaf = df['label'].astype(float)
    if fn.startswith('train'):
        Y_parent = np.array([l if l in p else id_parent[l] for (l, p) in zip(df['label'], df['pseudo_label'])], dtype=float)
        Y_hierarchy = to_sparse(df['pseudo_label']).astype(float)
        write_h5(fn, X, Y_leaf, Y_parent, Y_hierarchy)
    else:
        write_h5(fn, X, Y_leaf)