Configuration


In [ ]:
config = {
    # General
    'batchsize': 227,
    'shuffle': True,
    # Augmentations
    'flip': False,
    'ascale': True, 'as_min': 0.6667, 'as_max': 1.5,
    'rotate': False, 'r_positions': 12, 'test_pos': None,
    'translate': True, 't_rate': 0.1,
    # Point clouds and kd-trees generation
    'steps': 10, # also control the depth of the network
    'dim': 3,
    'lim': 1,
    'det': False,
    'gamma': 10.,
    # NN options
    'input_features': 'all', # 'all' for point coordinates, 'no' for feeding 1's as point features
    'n_f': [16, 
            32,  32,  
            64,  64,  
            128, 128, 
            256, 256, 
            512, 128], # representation sizes
    'n_output': 10,
    'l2': 1e-3,
    'lr': 1e-3,
    'n_ens': 16
}

Load Data


In [ ]:
import h5py as h5
import numpy as np

In [ ]:
path2data = "./modelnet10.h5"
with h5.File(path2data, 'r') as hf:
    train_vertices = np.array(hf.get('train_vertices'))
    train_faces = np.array(hf.get('train_faces'))
    train_nFaces = np.array(hf.get('train_nFaces'))
    train_labels = np.array(hf.get('train_labels'))
    test_vertices = np.array(hf.get('test_vertices'))
    test_faces = np.array(hf.get('test_faces'))
    test_nFaces = np.array(hf.get('test_nFaces'))
    test_labels = np.array(hf.get('test_labels'))

Build Network


In [ ]:
import theano.sandbox.cuda
theano.sandbox.cuda.use('gpu0')
import theano
import theano.tensor as T

from lasagne.layers import InputLayer, ReshapeLayer, NonlinearityLayer, ExpressionLayer
from lasagne.layers import ElemwiseSumLayer, ElemwiseMergeLayer
from lasagne.layers import DenseLayer
from lasagne.layers.dnn import BatchNormDNNLayer
from lasagne.nonlinearities import rectify, softmax

from lasagne.layers import get_output, get_all_params
from lasagne.regularization import regularize_network_params, l2
from lasagne.objectives import categorical_crossentropy, categorical_accuracy
from lasagne.updates import adam

from lib.nn.layers import SharedDotLayer, SPTNormReshapeLayer
from lib.nn.utils import dump_weights, load_weights

In [ ]:
clouds = T.tensor3(dtype='float32')
norms = [T.tensor3(dtype='float32') for step in xrange(config['steps'])]
target = T.vector(dtype='int32')

KDNet = {}
if config['input_features'] == 'no':
    KDNet['input'] = InputLayer((None, 1, 2**config['steps']), input_var=clouds)
else:
    KDNet['input'] = InputLayer((None, 3, 2**config['steps']), input_var=clouds)
for i in xrange(config['steps']):
    KDNet['norm{}_r'.format(i+1)] = InputLayer((None, 3, 2**(config['steps']-1-i)), input_var=norms[i])
    KDNet['norm{}_l'.format(i+1)] = ExpressionLayer(KDNet['norm{}_r'.format(i+1)], lambda X: -X)

    KDNet['norm{}_l_X-'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_l'.format(i+1)], '-', 0, config['n_f'][i+1])
    KDNet['norm{}_l_Y-'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_l'.format(i+1)], '-', 1, config['n_f'][i+1])
    KDNet['norm{}_l_Z-'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_l'.format(i+1)], '-', 2, config['n_f'][i+1])
    KDNet['norm{}_l_X+'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_l'.format(i+1)], '+', 0, config['n_f'][i+1])
    KDNet['norm{}_l_Y+'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_l'.format(i+1)], '+', 1, config['n_f'][i+1])
    KDNet['norm{}_l_Z+'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_l'.format(i+1)], '+', 2, config['n_f'][i+1])
    KDNet['norm{}_r_X-'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_r'.format(i+1)], '-', 0, config['n_f'][i+1])
    KDNet['norm{}_r_Y-'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_r'.format(i+1)], '-', 1, config['n_f'][i+1])
    KDNet['norm{}_r_Z-'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_r'.format(i+1)], '-', 2, config['n_f'][i+1])
    KDNet['norm{}_r_X+'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_r'.format(i+1)], '+', 0, config['n_f'][i+1])
    KDNet['norm{}_r_Y+'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_r'.format(i+1)], '+', 1, config['n_f'][i+1])
    KDNet['norm{}_r_Z+'.format(i+1)] = SPTNormReshapeLayer(KDNet['norm{}_r'.format(i+1)], '+', 2, config['n_f'][i+1])

    KDNet['cloud{}'.format(i+1)] = SharedDotLayer(KDNet['input'], config['n_f'][i]) if i == 0 else \
                                   ElemwiseSumLayer([KDNet['cloud{}_l_X-_masked'.format(i)],
                                                     KDNet['cloud{}_l_Y-_masked'.format(i)],
                                                     KDNet['cloud{}_l_Z-_masked'.format(i)],
                                                     KDNet['cloud{}_l_X+_masked'.format(i)],
                                                     KDNet['cloud{}_l_Y+_masked'.format(i)],
                                                     KDNet['cloud{}_l_Z+_masked'.format(i)],
                                                     KDNet['cloud{}_r_X-_masked'.format(i)],
                                                     KDNet['cloud{}_r_Y-_masked'.format(i)],
                                                     KDNet['cloud{}_r_Z-_masked'.format(i)],
                                                     KDNet['cloud{}_r_X+_masked'.format(i)],
                                                     KDNet['cloud{}_r_Y+_masked'.format(i)],
                                                     KDNet['cloud{}_r_Z+_masked'.format(i)]])
    KDNet['cloud{}_bn'.format(i+1)] = BatchNormDNNLayer(KDNet['cloud{}'.format(i+1)])
    KDNet['cloud{}_relu'.format(i+1)] = NonlinearityLayer(KDNet['cloud{}_bn'.format(i+1)], rectify)

    KDNet['cloud{}_r'.format(i+1)] = ExpressionLayer(KDNet['cloud{}_relu'.format(i+1)],
                                                     lambda X: X[:, :, 1::2], 
                                                     (None, config['n_f'][i], 2**(config['steps']-i-1)))
    KDNet['cloud{}_l'.format(i+1)] = ExpressionLayer(KDNet['cloud{}_relu'.format(i+1)],
                                                     lambda X: X[:, :, ::2], 
                                                     (None, config['n_f'][i], 2**(config['steps']-i-1)))

    KDNet['cloud{}_l_X-'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_l'.format(i+1)], config['n_f'][i+1])
    KDNet['cloud{}_l_Y-'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_l'.format(i+1)], config['n_f'][i+1])
    KDNet['cloud{}_l_Z-'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_l'.format(i+1)], config['n_f'][i+1])
    KDNet['cloud{}_l_X+'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_l'.format(i+1)], config['n_f'][i+1])
    KDNet['cloud{}_l_Y+'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_l'.format(i+1)], config['n_f'][i+1])
    KDNet['cloud{}_l_Z+'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_l'.format(i+1)], config['n_f'][i+1])
    KDNet['cloud{}_r_X-'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_r'.format(i+1)], config['n_f'][i+1],
                                                       W=KDNet['cloud{}_l_X-'.format(i+1)].W, 
                                                       b=KDNet['cloud{}_l_X-'.format(i+1)].b)
    KDNet['cloud{}_r_Y-'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_r'.format(i+1)], config['n_f'][i+1],
                                                       W=KDNet['cloud{}_l_Y-'.format(i+1)].W, 
                                                       b=KDNet['cloud{}_l_Y-'.format(i+1)].b)
    KDNet['cloud{}_r_Z-'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_r'.format(i+1)], config['n_f'][i+1],
                                                       W=KDNet['cloud{}_l_Z-'.format(i+1)].W, 
                                                       b=KDNet['cloud{}_l_Z-'.format(i+1)].b)
    KDNet['cloud{}_r_X+'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_r'.format(i+1)], config['n_f'][i+1],
                                                       W=KDNet['cloud{}_l_X+'.format(i+1)].W,
                                                       b=KDNet['cloud{}_l_X+'.format(i+1)].b)
    KDNet['cloud{}_r_Y+'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_r'.format(i+1)], config['n_f'][i+1],
                                                       W=KDNet['cloud{}_l_Y+'.format(i+1)].W,
                                                       b=KDNet['cloud{}_l_Y+'.format(i+1)].b)
    KDNet['cloud{}_r_Z+'.format(i+1)] = SharedDotLayer(KDNet['cloud{}_r'.format(i+1)], config['n_f'][i+1],
                                                       W=KDNet['cloud{}_l_Z+'.format(i+1)].W,
                                                       b=KDNet['cloud{}_l_Z+'.format(i+1)].b)

    KDNet['cloud{}_l_X-_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_l_X-'.format(i+1)],
                                                                   KDNet['norm{}_l_X-'.format(i+1)]], T.mul)
    KDNet['cloud{}_l_Y-_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_l_Y-'.format(i+1)],
                                                                   KDNet['norm{}_l_Y-'.format(i+1)]], T.mul)
    KDNet['cloud{}_l_Z-_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_l_Z-'.format(i+1)],
                                                                   KDNet['norm{}_l_Z-'.format(i+1)]], T.mul)
    KDNet['cloud{}_l_X+_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_l_X+'.format(i+1)],
                                                                   KDNet['norm{}_l_X+'.format(i+1)]], T.mul)
    KDNet['cloud{}_l_Y+_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_l_Y+'.format(i+1)],
                                                                   KDNet['norm{}_l_Y+'.format(i+1)]], T.mul)
    KDNet['cloud{}_l_Z+_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_l_Z+'.format(i+1)],
                                                                   KDNet['norm{}_l_Z+'.format(i+1)]], T.mul)
    KDNet['cloud{}_r_X-_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_r_X-'.format(i+1)],
                                                                   KDNet['norm{}_r_X-'.format(i+1)]], T.mul)
    KDNet['cloud{}_r_Y-_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_r_Y-'.format(i+1)],
                                                                   KDNet['norm{}_r_Y-'.format(i+1)]], T.mul)
    KDNet['cloud{}_r_Z-_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_r_Z-'.format(i+1)],
                                                                   KDNet['norm{}_r_Z-'.format(i+1)]], T.mul)
    KDNet['cloud{}_r_X+_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_r_X+'.format(i+1)],
                                                                   KDNet['norm{}_r_X+'.format(i+1)]], T.mul)
    KDNet['cloud{}_r_Y+_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_r_Y+'.format(i+1)],
                                                                   KDNet['norm{}_r_Y+'.format(i+1)]], T.mul)
    KDNet['cloud{}_r_Z+_masked'.format(i+1)] = ElemwiseMergeLayer([KDNet['cloud{}_r_Z+'.format(i+1)],
                                                                   KDNet['norm{}_r_Z+'.format(i+1)]], T.mul)

KDNet['cloud_fin'] = ElemwiseSumLayer([KDNet['cloud{}_l_X-_masked'.format(config['steps'])],
                                       KDNet['cloud{}_l_Y-_masked'.format(config['steps'])],
                                       KDNet['cloud{}_l_Z-_masked'.format(config['steps'])],
                                       KDNet['cloud{}_l_X+_masked'.format(config['steps'])],
                                       KDNet['cloud{}_l_Y+_masked'.format(config['steps'])],
                                       KDNet['cloud{}_l_Z+_masked'.format(config['steps'])],
                                       KDNet['cloud{}_r_X-_masked'.format(config['steps'])],
                                       KDNet['cloud{}_r_Y-_masked'.format(config['steps'])],
                                       KDNet['cloud{}_r_Z-_masked'.format(config['steps'])],
                                       KDNet['cloud{}_r_X+_masked'.format(config['steps'])],
                                       KDNet['cloud{}_r_Y+_masked'.format(config['steps'])],
                                       KDNet['cloud{}_r_Z+_masked'.format(config['steps'])]])
KDNet['cloud_fin_bn'] = BatchNormDNNLayer(KDNet['cloud_fin'])
KDNet['cloud_fin_relu'] = NonlinearityLayer(KDNet['cloud_fin_bn'], rectify)
KDNet['cloud_fin_reshape'] = ReshapeLayer(KDNet['cloud_fin_relu'], (-1, config['n_f'][-1]))
KDNet['output'] = DenseLayer(KDNet['cloud_fin_reshape'], config['n_output'], nonlinearity=softmax)

# Loading weights (optional)
# load_weights('./models/RT+AS+TR+1e-3_10.pkl', KDNet['output'])

prob = get_output(KDNet['output'])
prob_det = get_output(KDNet['output'], deterministic=True)

weights = get_all_params(KDNet['output'], trainable=True)
l2_pen = regularize_network_params(KDNet['output'], l2)

loss = categorical_crossentropy(prob, target).mean() + config['l2']*l2_pen
accuracy = categorical_accuracy(prob, target).mean()

lr = theano.shared(np.float32(config['lr']))
updates = adam(loss, weights, learning_rate=lr)

train_fun = theano.function([clouds] + norms + [target], [loss, accuracy], updates=updates)
prob_fun = theano.function([clouds] + norms, prob_det)

Build Data Iterator


In [ ]:
from lib.generators.meshgrid import generate_clouds
from lib.trees.kdtrees import KDTrees

In [ ]:
def iterate_minibatches(*arrays, **kwargs):
    if kwargs['mode'] == 'train':
        indices = np.random.choice((len(arrays[2]) - 1), 
                                   size=(len(arrays[2]) - 1)/kwargs['batchsize']*kwargs['batchsize'])
    elif kwargs['mode'] == 'test':
        indices = np.arange(len(arrays[2]) - 1)
    if kwargs['shuffle']:
        np.random.shuffle(indices)
        
    for start_idx in xrange(0, len(indices), kwargs['batchsize']):
        excerpt = indices[start_idx:start_idx + kwargs['batchsize']]
        tmp = generate_clouds(excerpt, kwargs['steps'], arrays[0], arrays[1], arrays[2])
        
        if kwargs['flip']:
            flip = np.random.random(size=(len(tmp), 2, 1))
            flip[flip >= 0.5] = 1.
            flip[flip < 0.5] = -1.
            tmp[:, :2] *= flip
        
        if kwargs['ascale']:
            tmp *= (kwargs['as_min'] + (kwargs['as_max'] - kwargs['as_min'])*np.random.random(size=(len(tmp), kwargs['dim'], 1)))
            tmp /= np.fabs(tmp).max(axis=(1, 2), keepdims=True)
            
        if kwargs['rotate']:
            r = np.sqrt((tmp[:, :2]**2).sum(axis=1))
            coss = tmp[:, 0]/r
            sins = tmp[:, 1]/r
            
            if kwargs['test_pos'] is not None:
                alpha = 2*np.pi*kwargs['test_pos']/kwargs['r_positions']
            else:
                alpha = 2*np.pi*np.random.randint(0, kwargs['r_positions'], (len(tmp), 1))/kwargs['positions']
                
            cosr = np.cos(alpha)
            sinr = np.sin(alpha)
            cos = coss*cosr - sins*sinr
            sin = sins*cosr + sinr*coss
            tmp[:, 0] = r*cos
            tmp[:, 1] = r*sin
            
        if kwargs['translate']:
            mins = tmp.min(axis=2, keepdims=True)
            maxs = tmp.max(axis=2, keepdims=True)
            rngs = maxs - mins
            tmp += kwargs['t_rate']*(np.random.random(size=(len(tmp), kwargs['dim'], 1)) - 0.5)*rngs
        
        trees_data = KDTrees(tmp, dim=kwargs['dim'], steps=kwargs['steps'], 
                             lim=kwargs['lim'], det=kwargs['det'], gamma=kwargs['gamma'])
            
        sortings, normals = trees_data['sortings'], trees_data['normals']
        
        if kwargs['input_features'] == 'all':
            clouds = np.empty((len(excerpt), kwargs['dim'], 2**kwargs['steps']), dtype=np.float32)
            for i, srt in enumerate(sortings):
                clouds[i] = tmp[i, :, srt].T
        elif kwargs['input_features'] == 'no':
            clouds = np.ones((len(excerpt), 1, 2**kwargs['steps']), dtype=np.float32)
        
        if kwargs['mode'] == 'train':
            yield [clouds] + normals[::-1] + [arrays[3][excerpt]]
        if kwargs['mode'] == 'test':
            yield [clouds] + normals[::-1] + [excerpt]

Prediction function


In [ ]:
def get_probs(vertices, faces, nFaces, **kwargs):
    prob_sum = np.zeros((len(nFaces)-1, kwargs['n_output']), dtype=np.float32)
    for ens in xrange(kwargs['n_ens']):
        probability = np.zeros((len(nFaces)-1, kwargs['n_output']), dtype=np.float32)    
        for i, batch in enumerate(iterate_minibatches(vertices, faces, nFaces, **kwargs)):
            probability[batch[-1]] += prob_fun(batch[0], 
                                               batch[1], batch[2], batch[3], batch[4], batch[5], 
                                               batch[6], batch[7], batch[8], batch[9], batch[10])
        prob_sum += probability
    return prob_sum / kwargs['n_ens']


def acc_fun(vertices, faces, nFaces, labels, **kwargs):
    predictions = get_probs(vertices, faces, nFaces, **kwargs)
    return 100.*(predictions.argmax(axis=1) == labels).sum()/len(labels)

Train


In [ ]:
from sys import stdout
from time import time

config['mode'] = 'train'

In [ ]:
num_epochs = 300
num_save = 10

train_accuracy = np.zeros(num_epochs, dtype=np.float32)
test_accuracy = np.zeros(num_save, dtype=np.float32)

# lr.set_value(np.float32(1e-4))

for epoch in xrange(num_epochs):
    train_err = 0.
    train_acc = 0.
    
    start_time = time()
    for i, batch in enumerate(iterate_minibatches(train_vertices, train_faces, train_nFaces, train_labels, **config)):
        train_err_batch, train_acc_batch = train_fun(batch[0], 
                                                     batch[1], batch[2], batch[3], batch[4], batch[5], 
                                                     batch[6], batch[7], batch[8], batch[9], batch[10],
                                                     batch[11])
        train_err += train_err_batch*len(batch[0])
        train_acc += train_acc_batch*len(batch[0])

        stdout.write('\rEpoch progress: {}/{}\tAccuracy: {:.2f} %\t\tLoss: {:.5f}'
                     .format(config['batchsize']*(i+1),
                     len(train_nFaces)/config['batchsize']*config['batchsize'],
                     100*train_acc/(config['batchsize']*(i+1)),
                     train_err/(config['batchsize']*(i+1))))
        stdout.flush()
    stdout.write('\n')
    stdout.flush()
        
    train_accuracy[epoch] = 100*train_acc/np.float32(config['batchsize']*(i+1))
        
    print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time() - start_time))
    print("  training loss (in-iteration):\t\t{:.6f}".format(train_err/(config['batchsize']*(i+1))))
    print("  train accuracy:\t\t{:.2f} %".format(train_accuracy[epoch]))

    if (epoch+1) % (num_epochs/num_save) == 0:
        config['mode'] = 'test'
        test_accuracy[num_save*epoch/num_epochs] = acc_fun(test_vertices, test_faces, test_nFaces, test_labels, **config)
        print("  test accuracy:\t\t{:.2f} %".format(test_accuracy[num_save*epoch/num_epochs]))
        config['mode'] = 'train'
        
        dump_weights('../RT_AS+TR_10.pkl', KDNet['output'])
        print '  Model saved!'

In [ ]:
import matplotlib.pyplot as plt
%matplotlib inline

plt.figure(figsize=(6, 6))
plt.ylim(70, 100)
plt.plot(np.arange(1, num_epochs + 1, 1), train_accuracy, label="train")
plt.plot(np.arange(0, num_epochs, num_epochs/num_save), test_accuracy, label="test")

plt.legend(loc="best")
plt.xlabel("epochs")
plt.ylabel("accuracy, %")