Configuration


In [ ]:
label2name = {
    0: 'Airplane   ',
    1: 'Bag        ',
    2: 'Cap        ',
    3: 'Car        ',
    4: 'Chair      ',
    5: 'Earphone   ',
    6: 'Guitar     ',
    7: 'Knife      ',
    8: 'Lamp       ',
    9: 'Laptop     ',
    10: 'Motorbike ',
    11: 'Mug       ',
    12: 'Pistol    ',
    13: 'Rocket    ',
    14: 'Skateboard',
    15: 'Table     '
}

config = {
    # General
    'batchsize': 64,
    'shuffle': True,
    # Augmentations
    'flip': False,
    'ascale': True, 'as_min': 0.6667, 'as_max': 1.5,
    'rotate': False, 'r_positions': 12, 'test_pos': None,
    'translate': True, 't_rate': 0.1,
    # Point clouds and kd-trees generation
    'steps': 12, # also control the depth of the network
    'dim': 3,
    'lim': 1,
    'det': False,
    'gamma': 10.,
    # NN options
    'input_features': 'all', # 'all' for point coordinates, 'no' for feeding 1's as point features
    'n_f': [64, 
            64,  64,  64,  64,  
            128, 128, 128, 128, 
            256, 256, 256, 256, 
            512], # representation sizes
    'n_mlp': [128, 128], # mlp sizes
    'n_output': 50,
    'l2': 1e-4,
    'lr': 1e-4,
    'n_ens': 16
}

Load Data


In [ ]:
import h5py as h5
import numpy as np

In [ ]:
with h5.File('./shapenet_partanno.h5', 'r') as fin:
    train_labels = np.array(fin['train_labels'])
    train_borders = np.array(fin['train_borders'])
    train_points = np.array(fin['train_points'])
    train_point_labels = np.array(fin['train_point_labels'])
    
    val_labels = np.array(fin['val_labels'])
    val_borders = np.array(fin['val_borders'])
    val_points = np.array(fin['val_points'])
    val_point_labels = np.array(fin['val_point_labels'])

    test_labels = np.array(fin['test_labels'])
    test_borders = np.array(fin['test_borders'])
    test_points = np.array(fin['test_points'])
    test_point_labels = np.array(fin['test_point_labels'])
    
label2point_labels = {}
for i in xrange(train_labels.max()+1):
    ind_labels = np.empty((0,), dtype=np.int8)
    for ind in (train_labels == i).nonzero()[0]:
        ind_labels = np.hstack((ind_labels, train_point_labels[train_borders[ind]:train_borders[ind+1]]))
    for ind in (val_labels == i).nonzero()[0]:
        ind_labels = np.hstack((ind_labels, val_point_labels[val_borders[ind]:val_borders[ind+1]]))
    for ind in (test_labels == i).nonzero()[0]:
        ind_labels = np.hstack((ind_labels, test_point_labels[test_borders[ind]:test_borders[ind+1]]))
    label2point_labels[i] = np.unique(ind_labels)

_, val_label2weight = np.unique(val_labels, return_counts=True)
val_label2weight = np.float32(val_label2weight) / val_label2weight.sum()

_, test_label2weight = np.unique(test_labels, return_counts=True)
test_label2weight = np.float32(test_label2weight) / test_label2weight.sum()

Build Network


In [ ]:
import sys 
sys.setrecursionlimit(100000)

import theano.sandbox.cuda
theano.sandbox.cuda.use('gpu0')
import theano
import theano.tensor as T

from lasagne.layers import InputLayer, ReshapeLayer, NonlinearityLayer, ExpressionLayer
from lasagne.layers import ElemwiseSumLayer, ElemwiseMergeLayer, ConcatLayer
from lasagne.layers import DenseLayer
from lasagne.layers.dnn import BatchNormDNNLayer
from lasagne.nonlinearities import rectify, softmax

from lasagne.layers import get_output, get_all_params
from lasagne.regularization import regularize_network_params, l2
from lasagne.objectives import categorical_crossentropy, categorical_accuracy
from lasagne.updates import adam

from lib.nn.layers import SharedDotLayer, SPTNormReshapeLayer, SPTUpscaleLayer
from lib.nn.utils import dump_weights, load_weights

In [ ]:
clouds = T.tensor3(dtype='float32')
norms = [T.tensor3(dtype='float32') for step in xrange(config['steps'])]
prob_masks = T.matrix(dtype='float32')
target = T.matrix(dtype='int32')

KDNet = {}
if config['input_features'] == 'no':
    KDNet['input'] = InputLayer((None, 1, 2**config['steps']), input_var=clouds)
else:
    KDNet['input'] = InputLayer((None, 3, 2**config['steps']), input_var=clouds)
for i in xrange(config['steps']):
    KDNet['norm{}_r'.format(i+1)] = InputLayer((None, 3, 2**(config['steps']-1-i)), input_var=norms[i])
    KDNet['norm{}_l'.format(i+1)] = ExpressionLayer(KDNet['norm{}_r'.format(i+1)], lambda X: -X)
    
    KDNet['norm{}_r_X-'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_r'.format(i+1)], '-', 0, config['n_f'][i+1])
    KDNet['norm{}_r_Y-'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_r'.format(i+1)], '-', 1, config['n_f'][i+1])
    KDNet['norm{}_r_Z-'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_r'.format(i+1)], '-', 2, config['n_f'][i+1])
    KDNet['norm{}_r_X+'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_r'.format(i+1)], '+', 0, config['n_f'][i+1])
    KDNet['norm{}_r_Y+'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_r'.format(i+1)], '+', 1, config['n_f'][i+1])
    KDNet['norm{}_r_Z+'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_r'.format(i+1)], '+', 2, config['n_f'][i+1])   
    KDNet['norm{}_l_X-'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_l'.format(i+1)], '-', 0, config['n_f'][i+1])
    KDNet['norm{}_l_Y-'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_l'.format(i+1)], '-', 1, config['n_f'][i+1])
    KDNet['norm{}_l_Z-'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_l'.format(i+1)], '-', 2, config['n_f'][i+1])
    KDNet['norm{}_l_X+'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_l'.format(i+1)], '+', 0, config['n_f'][i+1])
    KDNet['norm{}_l_Y+'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_l'.format(i+1)], '+', 1, config['n_f'][i+1])
    KDNet['norm{}_l_Z+'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_l'.format(i+1)], '+', 2, config['n_f'][i+1])
    
    KDNet['cloud{}'.format(i+1)] = SharedDotLayer(KDNet['input'], config['n_f'][i]) if i == 0 else \
                                                  ElemwiseSumLayer([KDNet['cloud{}_l_X-_masked'.format(i)],
                                                                    KDNet['cloud{}_l_Y-_masked'.format(i)],
                                                                    KDNet['cloud{}_l_Z-_masked'.format(i)],
                                                                    KDNet['cloud{}_l_X+_masked'.format(i)],
                                                                    KDNet['cloud{}_l_Y+_masked'.format(i)],
                                                                    KDNet['cloud{}_l_Z+_masked'.format(i)],
                                                                    KDNet['cloud{}_r_X-_masked'.format(i)],
                                                                    KDNet['cloud{}_r_Y-_masked'.format(i)],
                                                                    KDNet['cloud{}_r_Z-_masked'.format(i)],
                                                                    KDNet['cloud{}_r_X+_masked'.format(i)],
                                                                    KDNet['cloud{}_r_Y+_masked'.format(i)],
                                                                    KDNet['cloud{}_r_Z+_masked'.format(i)]])
    KDNet['cloud{}_bn'.format(i+1)] = BatchNormDNNLayer(KDNet['cloud{}'.format(i+1)])
    KDNet['cloud{}_relu'.format(i+1)] = NonlinearityLayer(KDNet['cloud{}_bn'.format(i+1)], rectify)
    
    KDNet['cloud{}_r'.format(i+1)] = ExpressionLayer(KDNet['cloud{}_relu'.format(i+1)],
                                                     lambda X: X[:, :, 1::2], 
                                                     (None, config['n_f'][i], 2**(config['steps']-i-1)))
    KDNet['cloud{}_l'.format(i+1)] = ExpressionLayer(KDNet['cloud{}_relu'.format(i+1)],
                                                     lambda X: X[:, :, ::2], 
                                                     (None, config['n_f'][i], 2**(config['steps']-i-1)))
    
    KDNet['cloud{}_r_X-'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_r'.format(i+1)], config['n_f'][i+1])
    KDNet['cloud{}_r_Y-'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_r'.format(i+1)], config['n_f'][i+1])
    KDNet['cloud{}_r_Z-'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_r'.format(i+1)], config['n_f'][i+1])
    KDNet['cloud{}_r_X+'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_r'.format(i+1)], config['n_f'][i+1])
    KDNet['cloud{}_r_Y+'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_r'.format(i+1)], config['n_f'][i+1])
    KDNet['cloud{}_r_Z+'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_r'.format(i+1)], config['n_f'][i+1])
    KDNet['cloud{}_l_X-'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_l'.format(i+1)], config['n_f'][i+1],
                                                       W=KDNet['cloud{}_r_X-'.format(i+1)].W,
                                                       b=KDNet['cloud{}_r_X-'.format(i+1)].b)
    KDNet['cloud{}_l_Y-'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_l'.format(i+1)], config['n_f'][i+1],
                                                       W=KDNet['cloud{}_r_Y-'.format(i+1)].W,
                                                       b=KDNet['cloud{}_r_Y-'.format(i+1)].b)
    KDNet['cloud{}_l_Z-'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_l'.format(i+1)], config['n_f'][i+1],
                                                       W=KDNet['cloud{}_r_Z-'.format(i+1)].W,
                                                       b=KDNet['cloud{}_r_Z-'.format(i+1)].b)
    KDNet['cloud{}_l_X+'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_l'.format(i+1)], config['n_f'][i+1],
                                                       W=KDNet['cloud{}_r_X+'.format(i+1)].W,
                                                       b=KDNet['cloud{}_r_X+'.format(i+1)].b)
    KDNet['cloud{}_l_Y+'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_l'.format(i+1)], config['n_f'][i+1],
                                                       W=KDNet['cloud{}_r_Y+'.format(i+1)].W,
                                                       b=KDNet['cloud{}_r_Y+'.format(i+1)].b)
    KDNet['cloud{}_l_Z+'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_l'.format(i+1)], config['n_f'][i+1],
                                                       W=KDNet['cloud{}_r_Z+'.format(i+1)].W,
                                                       b=KDNet['cloud{}_r_Z+'.format(i+1)].b)
    
    KDNet['cloud{}_r_X-_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_r_X-'.format(i+1)],
                                                                   KDNet['norm{}_r_X-'.format(i+1)]], T.mul)
    KDNet['cloud{}_r_Y-_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_r_Y-'.format(i+1)],
                                                                   KDNet['norm{}_r_Y-'.format(i+1)]], T.mul)
    KDNet['cloud{}_r_Z-_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_r_Z-'.format(i+1)],
                                                                   KDNet['norm{}_r_Z-'.format(i+1)]], T.mul)
    KDNet['cloud{}_r_X+_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_r_X+'.format(i+1)],
                                                                   KDNet['norm{}_r_X+'.format(i+1)]], T.mul)
    KDNet['cloud{}_r_Y+_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_r_Y+'.format(i+1)],
                                                                   KDNet['norm{}_r_Y+'.format(i+1)]], T.mul)
    KDNet['cloud{}_r_Z+_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_r_Z+'.format(i+1)],
                                                                   KDNet['norm{}_r_Z+'.format(i+1)]], T.mul)
    KDNet['cloud{}_l_X-_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_l_X-'.format(i+1)],
                                                                   KDNet['norm{}_l_X-'.format(i+1)]], T.mul)
    KDNet['cloud{}_l_Y-_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_l_Y-'.format(i+1)],
                                                                   KDNet['norm{}_l_Y-'.format(i+1)]], T.mul)
    KDNet['cloud{}_l_Z-_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_l_Z-'.format(i+1)],
                                                                   KDNet['norm{}_l_Z-'.format(i+1)]], T.mul)
    KDNet['cloud{}_l_X+_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_l_X+'.format(i+1)],
                                                                   KDNet['norm{}_l_X+'.format(i+1)]], T.mul)
    KDNet['cloud{}_l_Y+_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_l_Y+'.format(i+1)],
                                                                   KDNet['norm{}_l_Y+'.format(i+1)]], T.mul)
    KDNet['cloud{}_l_Z+_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_l_Z+'.format(i+1)],
                                                                   KDNet['norm{}_l_Z+'.format(i+1)]], T.mul)
    
KDNet['cloud_fin'] = ElemwiseSumLayer([KDNet['cloud{}_l_X-_masked'.format(config['steps'])],
                                       KDNet['cloud{}_l_Y-_masked'.format(config['steps'])],
                                       KDNet['cloud{}_l_Z-_masked'.format(config['steps'])],
                                       KDNet['cloud{}_l_X+_masked'.format(config['steps'])],
                                       KDNet['cloud{}_l_Y+_masked'.format(config['steps'])],
                                       KDNet['cloud{}_l_Z+_masked'.format(config['steps'])],
                                       KDNet['cloud{}_r_X-_masked'.format(config['steps'])],
                                       KDNet['cloud{}_r_Y-_masked'.format(config['steps'])],
                                       KDNet['cloud{}_r_Z-_masked'.format(config['steps'])],
                                       KDNet['cloud{}_r_X+_masked'.format(config['steps'])],
                                       KDNet['cloud{}_r_Y+_masked'.format(config['steps'])],
                                       KDNet['cloud{}_r_Z+_masked'.format(config['steps'])]])
KDNet['cloud_fin_bn'] = BatchNormDNNLayer(KDNet['cloud_fin'])
KDNet['cloud_fin_relu'] = NonlinearityLayer(KDNet['cloud_fin_bn'], rectify)
KDNet['cloud_fin_reshape'] = ReshapeLayer(KDNet['cloud_fin_relu'], (-1, config['n_f'][-2]))

KDNet['fin'] = DenseLayer(KDNet['cloud_fin_reshape'], config['n_f'][-1])

KDNet['inv_cloud_fin_raw_reshape'] = DenseLayer(KDNet['fin'], config['n_f'][-2], nonlinearity=None)
KDNet['inv_cloud_fin_raw'] = ReshapeLayer(KDNet['inv_cloud_fin_raw_reshape'], (-1, config['n_f'][-2], 1))
KDNet['inv_cloud_fin_raw_bn'] = BatchNormDNNLayer(KDNet['inv_cloud_fin_raw'])
KDNet['inv_cloud_fin_raw_relu'] = NonlinearityLayer(KDNet['inv_cloud_fin_raw_bn'], rectify)

KDNet['add_cloud_fin'] = SharedDotLayer(KDNet['cloud_fin_relu'], config['n_f'][-2])
KDNet['add_cloud_fin_bn'] = BatchNormDNNLayer(KDNet['add_cloud_fin'])
KDNet['add_cloud_fin_relu'] = NonlinearityLayer(KDNet['add_cloud_fin_bn'], rectify)

KDNet['inv_cloud_fin'] = ConcatLayer([KDNet['inv_cloud_fin_raw_relu'], 
                                      KDNet['add_cloud_fin_relu']], axis=1)

for i in xrange(config['steps']):
    KDNet['inv_norm{}_r_X-'.format(config['steps']-i)] = SPTNormReshapeLayer(KDNet['norm{}_r'.format(config['steps']-i)], 
                                                                             '-', 0, config['n_f'][config['steps']-i-1])
    KDNet['inv_norm{}_r_Y-'.format(config['steps']-i)] = SPTNormReshapeLayer(KDNet['norm{}_r'.format(config['steps']-i)], 
                                                                             '-', 1, config['n_f'][config['steps']-i-1])
    KDNet['inv_norm{}_r_Z-'.format(config['steps']-i)] = SPTNormReshapeLayer(KDNet['norm{}_r'.format(config['steps']-i)], 
                                                                             '-', 2, config['n_f'][config['steps']-i-1])
    KDNet['inv_norm{}_r_X+'.format(config['steps']-i)] = SPTNormReshapeLayer(KDNet['norm{}_r'.format(config['steps']-i)], 
                                                                             '+', 0, config['n_f'][config['steps']-i-1])
    KDNet['inv_norm{}_r_Y+'.format(config['steps']-i)] = SPTNormReshapeLayer(KDNet['norm{}_r'.format(config['steps']-i)], 
                                                                             '+', 1, config['n_f'][config['steps']-i-1])
    KDNet['inv_norm{}_r_Z+'.format(config['steps']-i)] = SPTNormReshapeLayer(KDNet['norm{}_r'.format(config['steps']-i)], 
                                                                             '+', 2, config['n_f'][config['steps']-i-1])
    KDNet['inv_norm{}_l_X-'.format(config['steps']-i)] = SPTNormReshapeLayer(KDNet['norm{}_l'.format(config['steps']-i)], 
                                                                             '-', 0, config['n_f'][config['steps']-i-1])
    KDNet['inv_norm{}_l_Y-'.format(config['steps']-i)] = SPTNormReshapeLayer(KDNet['norm{}_l'.format(config['steps']-i)], 
                                                                             '-', 1, config['n_f'][config['steps']-i-1])
    KDNet['inv_norm{}_l_Z-'.format(config['steps']-i)] = SPTNormReshapeLayer(KDNet['norm{}_l'.format(config['steps']-i)], 
                                                                             '-', 2, config['n_f'][config['steps']-i-1])
    KDNet['inv_norm{}_l_X+'.format(config['steps']-i)] = SPTNormReshapeLayer(KDNet['norm{}_l'.format(config['steps']-i)], 
                                                                             '+', 0, config['n_f'][config['steps']-i-1])
    KDNet['inv_norm{}_l_Y+'.format(config['steps']-i)] = SPTNormReshapeLayer(KDNet['norm{}_l'.format(config['steps']-i)], 
                                                                             '+', 1, config['n_f'][config['steps']-i-1])
    KDNet['inv_norm{}_l_Z+'.format(config['steps']-i)] = SPTNormReshapeLayer(KDNet['norm{}_l'.format(config['steps']-i)], 
                                                                             '+', 2, config['n_f'][config['steps']-i-1])
    
    KDNet['inv_cloud{}_r_X-'.format(config['steps']-i)] = SharedDotLayer(
        KDNet['inv_cloud{}'.format('_fin' if i == 0 else config['steps']-i+1)], config['n_f'][config['steps']-i-1])
    KDNet['inv_cloud{}_r_Y-'.format(config['steps']-i)] = SharedDotLayer(
        KDNet['inv_cloud{}'.format('_fin' if i == 0 else config['steps']-i+1)], config['n_f'][config['steps']-i-1])
    KDNet['inv_cloud{}_r_Z-'.format(config['steps']-i)] = SharedDotLayer(
        KDNet['inv_cloud{}'.format('_fin' if i == 0 else config['steps']-i+1)], config['n_f'][config['steps']-i-1])
    KDNet['inv_cloud{}_r_X+'.format(config['steps']-i)] = SharedDotLayer(
        KDNet['inv_cloud{}'.format('_fin' if i == 0 else config['steps']-i+1)], config['n_f'][config['steps']-i-1])
    KDNet['inv_cloud{}_r_Y+'.format(config['steps']-i)] = SharedDotLayer(
        KDNet['inv_cloud{}'.format('_fin' if i == 0 else config['steps']-i+1)], config['n_f'][config['steps']-i-1])
    KDNet['inv_cloud{}_r_Z+'.format(config['steps']-i)] = SharedDotLayer(
        KDNet['inv_cloud{}'.format('_fin' if i == 0 else config['steps']-i+1)], config['n_f'][config['steps']-i-1])
    KDNet['inv_cloud{}_l_X-'.format(config['steps']-i)] = SharedDotLayer(
        KDNet['inv_cloud{}'.format('_fin' if i == 0 else config['steps']-i+1)], config['n_f'][config['steps']-i-1],
        W=KDNet['inv_cloud{}_r_X-'.format(config['steps']-i)].W, b=KDNet['inv_cloud{}_r_X-'.format(config['steps']-i)].b)
    KDNet['inv_cloud{}_l_Y-'.format(config['steps']-i)] = SharedDotLayer(
        KDNet['inv_cloud{}'.format('_fin' if i == 0 else config['steps']-i+1)], config['n_f'][config['steps']-i-1],
        W=KDNet['inv_cloud{}_r_Y-'.format(config['steps']-i)].W, b=KDNet['inv_cloud{}_r_Y-'.format(config['steps']-i)].b)
    KDNet['inv_cloud{}_l_Z-'.format(config['steps']-i)] = SharedDotLayer(
        KDNet['inv_cloud{}'.format('_fin' if i == 0 else config['steps']-i+1)], config['n_f'][config['steps']-i-1],
        W=KDNet['inv_cloud{}_r_Z-'.format(config['steps']-i)].W, b=KDNet['inv_cloud{}_r_Z-'.format(config['steps']-i)].b)
    KDNet['inv_cloud{}_l_X+'.format(config['steps']-i)] = SharedDotLayer(
        KDNet['inv_cloud{}'.format('_fin' if i == 0 else config['steps']-i+1)], config['n_f'][config['steps']-i-1],
        W=KDNet['inv_cloud{}_r_X+'.format(config['steps']-i)].W, b=KDNet['inv_cloud{}_r_X+'.format(config['steps']-i)].b)
    KDNet['inv_cloud{}_l_Y+'.format(config['steps']-i)] = SharedDotLayer(
        KDNet['inv_cloud{}'.format('_fin' if i == 0 else config['steps']-i+1)], config['n_f'][config['steps']-i-1],
        W=KDNet['inv_cloud{}_r_Y+'.format(config['steps']-i)].W, b=KDNet['inv_cloud{}_r_Y+'.format(config['steps']-i)].b)
    KDNet['inv_cloud{}_l_Z+'.format(config['steps']-i)] = SharedDotLayer(
        KDNet['inv_cloud{}'.format('_fin' if i == 0 else config['steps']-i+1)], config['n_f'][config['steps']-i-1],
        W=KDNet['inv_cloud{}_r_Z+'.format(config['steps']-i)].W, b=KDNet['inv_cloud{}_r_Z+'.format(config['steps']-i)].b)
    
    KDNet['inv_cloud{}_r_X-_masked'.format(config['steps']-i)] = \
        ElemwiseMergeLayer([KDNet['inv_cloud{}_r_X-'.format(config['steps']-i)], 
                            KDNet['inv_norm{}_r_X-'.format(config['steps']-i)]], T.mul)
    KDNet['inv_cloud{}_r_Y-_masked'.format(config['steps']-i)] = \
        ElemwiseMergeLayer([KDNet['inv_cloud{}_r_Y-'.format(config['steps']-i)],
                            KDNet['inv_norm{}_r_Y-'.format(config['steps']-i)]], T.mul)
    KDNet['inv_cloud{}_r_Z-_masked'.format(config['steps']-i)] = \
        ElemwiseMergeLayer([KDNet['inv_cloud{}_r_Z-'.format(config['steps']-i)],
                            KDNet['inv_norm{}_r_Z-'.format(config['steps']-i)]], T.mul)
    KDNet['inv_cloud{}_r_X+_masked'.format(config['steps']-i)] = \
        ElemwiseMergeLayer([KDNet['inv_cloud{}_r_X+'.format(config['steps']-i)], 
                            KDNet['inv_norm{}_r_X+'.format(config['steps']-i)]], T.mul)
    KDNet['inv_cloud{}_r_Y+_masked'.format(config['steps']-i)] = \
        ElemwiseMergeLayer([KDNet['inv_cloud{}_r_Y+'.format(config['steps']-i)],
                            KDNet['inv_norm{}_r_Y+'.format(config['steps']-i)]], T.mul)
    KDNet['inv_cloud{}_r_Z+_masked'.format(config['steps']-i)] = \
        ElemwiseMergeLayer([KDNet['inv_cloud{}_r_Z+'.format(config['steps']-i)],
                            KDNet['inv_norm{}_r_Z+'.format(config['steps']-i)]], T.mul)
    KDNet['inv_cloud{}_l_X-_masked'.format(config['steps']-i)] = \
        ElemwiseMergeLayer([KDNet['inv_cloud{}_l_X-'.format(config['steps']-i)], 
                            KDNet['inv_norm{}_l_X-'.format(config['steps']-i)]], T.mul)
    KDNet['inv_cloud{}_l_Y-_masked'.format(config['steps']-i)] = \
        ElemwiseMergeLayer([KDNet['inv_cloud{}_l_Y-'.format(config['steps']-i)],
                            KDNet['inv_norm{}_l_Y-'.format(config['steps']-i)]], T.mul)
    KDNet['inv_cloud{}_l_Z-_masked'.format(config['steps']-i)] = \
        ElemwiseMergeLayer([KDNet['inv_cloud{}_l_Z-'.format(config['steps']-i)],
                            KDNet['inv_norm{}_l_Z-'.format(config['steps']-i)]], T.mul)
    KDNet['inv_cloud{}_l_X+_masked'.format(config['steps']-i)] = \
        ElemwiseMergeLayer([KDNet['inv_cloud{}_l_X+'.format(config['steps']-i)], 
                            KDNet['inv_norm{}_l_X+'.format(config['steps']-i)]], T.mul)
    KDNet['inv_cloud{}_l_Y+_masked'.format(config['steps']-i)] = \
        ElemwiseMergeLayer([KDNet['inv_cloud{}_l_Y+'.format(config['steps']-i)],
                            KDNet['inv_norm{}_l_Y+'.format(config['steps']-i)]], T.mul)
    KDNet['inv_cloud{}_l_Z+_masked'.format(config['steps']-i)] = \
        ElemwiseMergeLayer([KDNet['inv_cloud{}_l_Z+'.format(config['steps']-i)],
                            KDNet['inv_norm{}_l_Z+'.format(config['steps']-i)]], T.mul)

    KDNet['inv_cloud{}_r'.format(config['steps']-i)] = ElemwiseSumLayer(
        [KDNet['inv_cloud{}_r_X-_masked'.format(config['steps']-i)],
         KDNet['inv_cloud{}_r_Y-_masked'.format(config['steps']-i)],
         KDNet['inv_cloud{}_r_Z-_masked'.format(config['steps']-i)],
         KDNet['inv_cloud{}_r_X+_masked'.format(config['steps']-i)],
         KDNet['inv_cloud{}_r_Y+_masked'.format(config['steps']-i)],
         KDNet['inv_cloud{}_r_Z+_masked'.format(config['steps']-i)]])
    KDNet['inv_cloud{}_l'.format(config['steps']-i)] = ElemwiseSumLayer(
        [KDNet['inv_cloud{}_l_X-_masked'.format(config['steps']-i)],
         KDNet['inv_cloud{}_l_Y-_masked'.format(config['steps']-i)],
         KDNet['inv_cloud{}_l_Z-_masked'.format(config['steps']-i)],
         KDNet['inv_cloud{}_l_X+_masked'.format(config['steps']-i)],
         KDNet['inv_cloud{}_l_Y+_masked'.format(config['steps']-i)],
         KDNet['inv_cloud{}_l_Z+_masked'.format(config['steps']-i)]])
    
    KDNet['inv_cloud{}_r_up'.format(config['steps']-i)] = SPTUpscaleLayer(
        KDNet['inv_cloud{}_r'.format(config['steps']-i)], mode='right')
    KDNet['inv_cloud{}_l_up'.format(config['steps']-i)] = SPTUpscaleLayer(
        KDNet['inv_cloud{}_l'.format(config['steps']-i)], mode='left')
    KDNet['inv_cloud{}_raw'.format(config['steps']-i)] = ElemwiseSumLayer(
        [KDNet['inv_cloud{}_r_up'.format(config['steps']-i)], 
         KDNet['inv_cloud{}_l_up'.format(config['steps']-i)]])
    KDNet['inv_cloud{}_raw_bn'.format(config['steps']-i)] = BatchNormDNNLayer(
        KDNet['inv_cloud{}_raw'.format(config['steps']-i)])
    KDNet['inv_cloud{}_raw_relu'.format(config['steps']-i)] = NonlinearityLayer(
        KDNet['inv_cloud{}_raw_bn'.format(config['steps']-i)], rectify)
    
    KDNet['add_cloud{}'.format(config['steps']-i)] = SharedDotLayer(
        KDNet['cloud{}_relu'.format(config['steps']-i)], config['n_f'][config['steps']-i-1])
    KDNet['add_cloud{}_bn'.format(config['steps']-i)] = BatchNormDNNLayer(
        KDNet['add_cloud{}'.format(config['steps']-i)])
    KDNet['add_cloud{}_relu'.format(config['steps']-i)] = NonlinearityLayer(
        KDNet['add_cloud{}_bn'.format(config['steps']-i)], rectify)
    
    KDNet['inv_cloud{}'.format(config['steps']-i)] = ConcatLayer(
        [KDNet['inv_cloud{}_raw_relu'.format(config['steps']-i)], 
         KDNet['add_cloud{}_relu'.format(config['steps']-i)]], axis=1)

for i in xrange(len(config['n_mlp'])):
    KDNet['mlp{}'.format(i+1)] = SharedDotLayer(KDNet['inv_cloud1' if i == 0 else ('mlp'+str(i))], config['n_mlp'][i])
    KDNet['mlp{}_bn'.format(i+1)] = BatchNormDNNLayer(KDNet['mlp{}'.format(i+1)])
    KDNet['mlp{}_relu'.format(i+1)] = NonlinearityLayer(KDNet['mlp{}_bn'.format(i+1)], rectify)

KDNet['final'] = SharedDotLayer(KDNet['mlp{}_relu'.format(len(config['n_mlp']))], config['n_output'])

# Loading weights (optional)
# load_weights('./models/RT+AS+TR+1e-4_10.pkl', KDNet['final'])

final = get_output(KDNet['final'])
final_exp = T.exp(final - (final*prob_masks.dimshuffle(0, 1, 'x')).max(axis=1, keepdims=True))
final_exp_masked = final_exp*prob_masks.dimshuffle(0, 1, 'x')
probs = final_exp_masked / final_exp_masked.sum(axis=1, keepdims=True)

final_det = get_output(KDNet['final'], deterministic=True)
final_det_exp = T.exp(final_det - (final_det*prob_masks.dimshuffle(0, 1, 'x')).max(axis=1, keepdims=True))
final_det_exp_masked = final_det_exp*prob_masks.dimshuffle(0, 1, 'x')
probs_det = final_det_exp_masked / final_det_exp_masked.sum(axis=1, keepdims=True)

weights = get_all_params(KDNet['final'], trainable=True)
l2_pen = regularize_network_params(KDNet['final'], l2)

target_shaped = T.cast(T.eq(T.tile(np.arange(config['n_output'], dtype=np.int32).reshape(1, -1, 1), 
                                   (target.shape[0], 1, target.shape[1])),
                            target.dimshuffle(0, 'x', 1)), 
                       'float32')

loss = T.mean(-T.sum(target_shaped*T.log(probs + 1e-8), axis=1), axis=1).mean() + config['l2']*l2_pen
acc = T.cast(T.eq(T.argmax(probs, axis=1), target), 'int32').sum()

lr = theano.shared(np.float32(config['lr']))
updates = adam(loss, weights, learning_rate=lr)

train_fun = theano.function([clouds] + norms + [prob_masks, target], [loss, acc], updates=updates)
prob_fun = theano.function([clouds] + norms + [prob_masks], probs_det)

Build Data Iterator


In [ ]:
from copy import copy

from lib.generators.pointcloud import generate_clouds
from lib.trees.kdtrees import KDTrees

In [ ]:
def Leaves2Points(cloud, idx, normals, medians, steps=10):
    mask0 = [np.arange(cloud.shape[1])]
    
    for step in xrange(steps):
        mask1 = []
        for i, submask in enumerate(mask0):
            tmp_cloud = cloud[:, submask]
            mask_buf = medians[step][idx, i] <= (normals[step][idx, :, i].reshape(-1, 1)*tmp_cloud).sum(axis=0)
            mask1.append(submask[np.logical_not(mask_buf).nonzero()[0]])
            mask1.append(submask[mask_buf.nonzero()[0]])
        mask0 = copy(mask1)
        
    return mask0


def iterate_minibatches(*arrays, **kwargs):
    if kwargs['mode'] == 'train':
        indices = np.random.choice((len(arrays[1]) - 1), 
                                   size=(len(arrays[1]) - 1)/kwargs['batchsize']*kwargs['batchsize'])
    elif kwargs['mode'] == 'test':
        indices = np.arange(len(arrays[1]) - 1)
    if kwargs['shuffle']:
        np.random.shuffle(indices)
        
    for start_idx in xrange(0, len(indices), kwargs['batchsize']):
        excerpt = indices[start_idx:start_idx + kwargs['batchsize']]
        tmp, smpl = generate_clouds(excerpt, kwargs['steps'], arrays[0], arrays[1])
        labels = arrays[2][excerpt]
        
        prob_masks = np.zeros((len(excerpt), kwargs['n_output']), dtype=np.float32)
        for i, label in enumerate(labels):
            prob_masks[i][label2point_labels[label]] = 1.
        
        if kwargs['flip']:
            flip = np.random.random(size=(len(tmp), 2, 1))
            flip[flip >= 0.5] = 1.
            flip[flip < 0.5] = -1.
            tmp[:, :2] *= flip
        
        if kwargs['ascale']:
            ascale = (kwargs['as_min'] + (kwargs['as_max'] - kwargs['as_min'])*np.random.random(size=(len(tmp), kwargs['dim'], 1)))
            tmp *= ascale
            tmp /= np.fabs(tmp).max(axis=(1, 2), keepdims=True)
            
        if kwargs['rotate']:
            r = np.sqrt((tmp[:, :2]**2).sum(axis=1))
            coss = tmp[:, 0]/r
            sins = tmp[:, 1]/r
            
            if kwargs['test_pos'] is not None:
                alpha = 2*np.pi*kwargs['test_pos']/kwargs['r_positions']
            else:
                alpha = 2*np.pi*np.random.randint(0, kwargs['r_positions'], (len(tmp), 1))/kwargs['positions']
                
            cosr = np.cos(alpha)
            sinr = np.sin(alpha)
            cos = coss*cosr - sins*sinr
            sin = sins*cosr + sinr*coss
            tmp[:, 0] = r*cos
            tmp[:, 1] = r*sin
            
        if kwargs['translate']:
            mins = tmp.min(axis=2, keepdims=True)
            maxs = tmp.max(axis=2, keepdims=True)
            rngs = maxs - mins
            translate = kwargs['t_rate']*(np.random.random(size=(len(tmp), kwargs['dim'], 1)) - 0.5)*rngs
            tmp += translate

        trees_data = KDTrees(tmp, dim=kwargs['dim'], steps=kwargs['steps'], 
                             lim=kwargs['lim'], det=kwargs['det'], gamma=kwargs['gamma'],
                             medians=False if kwargs['mode'] == 'train' else True)
            
        sortings, normals, medians = trees_data['sortings'], trees_data['normals'], trees_data.get('medians', None)
        
        clouds = np.empty((len(excerpt), 3 if kwargs['input_features'] == 'all' else 1, 2**kwargs['steps']), 
                          dtype=np.float32)
        
        if kwargs['mode'] == 'train':
            tmp_point_labels = arrays[3][smpl]
            clouds_point_labels = np.empty((len(excerpt), 2**kwargs['steps']), dtype=np.int32)
            for i, srt in enumerate(sortings):
                clouds[i] = tmp[i, :, srt].T
                clouds_point_labels[i] = tmp_point_labels[i, srt]
                
            yield [clouds] + normals[::-1] + [prob_masks, clouds_point_labels]
            
        if kwargs['mode'] == 'test':
            for i, srt in enumerate(sortings):
                clouds[i] = tmp[i, :, srt].T
            
            mappings = []
            for i, idx in enumerate(excerpt):
                pcoord4mapping = np.transpose(arrays[0][arrays[1][idx]:arrays[1][idx+1], :].copy(), (1, 0))
                
                if kwargs['flip']:
                    pcoord4mapping[:2] *= flip[i]
                
                if kwargs['ascale']:
                    pcoord4mapping *= ascale[i]
                    pcoord4mapping /= np.fabs(pcoord4mapping).max()
                    
                if kwargs['translate']:
                    pcoord4mapping += translate[i]
                        
                mappings.append(
                    Leaves2Points(pcoord4mapping.astype(np.float32), i, normals, medians, steps=kwargs['steps']))
            
            yield [clouds] + normals[::-1] + [prob_masks, mappings, excerpt]

Prediction function


In [ ]:
from sklearn.metrics import confusion_matrix


def get_probs(coordinates, borders, labels, **kwargs):
    predictions = []
    for i in xrange(len(borders)-1):
        predictions.append(np.zeros((borders[i+1]-borders[i], kwargs['n_output']), dtype=np.float32))
        
    for ens in xrange(kwargs['n_ens']):
        for i, batch in enumerate(iterate_minibatches(coordinates, borders, labels, **kwargs)):
            probs = prob_fun(batch[0], 
                             batch[1], batch[2], batch[3], batch[4], batch[5], batch[6], 
                             batch[7], batch[8], batch[9], batch[10], batch[11], batch[12],
                             batch[13])
            
            for j, mapping in enumerate(batch[-2]):
                for k, inds in enumerate(mapping):
                    predictions[batch[-1][j]][inds] += probs[j, :, k].reshape(1, -1)
        
    return list(map(lambda pred: pred/kwargs['n_ens'], predictions))


def acc_fun(borders, point_labels, probs):
    n_true = 0.
    n_all = 0.
    
    for i, prob in enumerate(probs):
        tmp_point_labels = point_labels[borders[i]:borders[i+1]]
        n_true += (prob.argmax(1) == tmp_point_labels).sum()
        n_all += len(tmp_point_labels)
        
    return 100.*n_true/n_all


def label_acc_fun(borders, labels, point_labels, probs):
    label2n_true = np.zeros(16, dtype=np.int32)
    label2n_all = np.zeros(16, dtype=np.int32)
        
    for i, prob in enumerate(probs):
        label2n_true[labels[i]] += (prob.argmax(1) == point_labels[borders[i]:borders[i+1]]).sum()
        label2n_all[labels[i]] += len(prob)
        
    return 100. * label2n_true / label2n_all


def mIoU_fun(borders, labels, point_labels, probs):
    sIoU = np.zeros(16, dtype=np.float32)
    _, label_counters = np.unique(labels, return_counts=True)
    
    for i, prob in enumerate(probs):
        C = confusion_matrix(point_labels[borders[i]:borders[i+1]], prob.argmax(1), 
                             labels=label2point_labels[labels[i]])
        
        intersections = C[np.arange(C.shape[0]), np.arange(C.shape[0])]
        unions = (C.sum(axis=0) + C.sum(axis=1) - intersections)
        
        sIoU_buf = 0.
        for j, union in enumerate(unions):
            if union == 0:
                sIoU_buf += 100.
            else:
                sIoU_buf += 100.*np.float32(intersections[j])/union
                
        sIoU[labels[i]] += sIoU_buf/C.shape[0]
        
    return sIoU / label_counters

Train


In [ ]:
from sys import stdout
from time import time

config['mode'] = 'train'

In [ ]:
num_epochs = 100
num_save = 5

# lr.set_value(np.float32(1e-4))

for epoch in xrange(num_epochs):
    train_err = 0.
    train_acc = 0.
    
    start_time = time()
    for i, batch in enumerate(iterate_minibatches(train_points, train_borders, 
                                                  train_labels, train_point_labels, **config)):
        train_err_batch, train_acc_batch = train_fun(batch[0], 
                                                     batch[1], batch[2], batch[3], batch[4], batch[5], batch[6], 
                                                     batch[7], batch[8], batch[9], batch[10], batch[11], batch[12],
                                                     batch[13],
                                                     batch[14])
        train_err += train_err_batch*len(batch[0])
        train_acc += train_acc_batch

        stdout.write('\rEpoch progress: {}/{}\tAccuracy: {:.2f} %\t\tLoss: {:.5f}'
                     .format(config['batchsize']*(i+1),
                     len(train_labels)/config['batchsize']*config['batchsize'],
                     100*train_acc/(2**config['steps'])/(config['batchsize']*(i+1)),
                     train_err/(config['batchsize']*(i+1))))
        stdout.flush()
    stdout.write('\n')
    stdout.flush()
        
    print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time() - start_time))
    print("  training loss (in-iteration):\t\t{:.6f}".format(train_err/(config['batchsize']*(i+1))))
    print("  train accuracy:\t\t{:.2f} %".format(100*train_acc/(2**config['steps'])/(config['batchsize']*(i+1))))

    if (epoch+1) % (num_epochs/num_save) == 0:
        config['mode'] = 'test'
        val_probs = get_probs(val_points, val_borders, val_labels, **config)
        val_acc = acc_fun(val_borders, val_point_labels, val_probs)
        val_label2acc = label_acc_fun(val_borders, val_labels, val_point_labels, val_probs)
        val_mIoU = mIoU_fun(val_borders, val_labels, val_point_labels, val_probs)
        print("  val accuracy:\t\t\t{:.2f}%".format(val_acc))
        print("  val category accuracy:")
        for j in xrange(16):
            print("  \t{}\t\t{:.2f}%".format(label2name[j], val_label2acc[j]))
        print("  \tMEAN\t\t\t{:.2f}%".format(val_label2acc.mean()))
        print("  val mIoU:")
        for j in xrange(16):
            print("  \t{}\t\t{:.2f}%".format(label2name[j], val_mIoU[j]))
        print("  \tMEAN\t\t\t{:.2f}%".format((val_mIoU*val_label2weight).sum()))
        config['mode'] = 'train'
        
        dump_weights('./models/RT_AS+TR_10.pkl', KDNet['final'])
        print '  Model saved!'

In [ ]: