adversarial-DNN

Author: Justin Tan

Adversarial training for robustness against systematic error.


In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import pyarrow.parquet as pq
import glob, time, os
import selu
from diagnostics import *

class config(object):
    # Set network parameters
    mode = 'adv-selu'
    channel = 'B2Xsy'
    keep_prob = 0.95
    num_epochs = 512
    batch_size = 256
    n_layers = 7
    adv_n_layers = 3
    adv_keep_prob = 0.98
    hidden_layer_nodes = [1024, 1024, 512, 512, 512, 256, 256]
    adv_hidden_nodes = [256,512,256]
    n_gaussians = 5
    ema_decay = 0.999
    learning_rate = 2.5e-5
    adv_learning_rate = 0.001
    adv_lambda = 1.0
    cycles = 3 # Number of annealing cycles
    n_classes = 2
    adv_n_classes = 10
    epsilon = 1e-8
    builder = 'selu'
    K = 3
    adversary = True
    pivots = ['deltae', 'mbc']

class directories(object):
    train = '/data/projects/punim0011/jtan/spark/spark2tf/cont_b2sy_By_val.parquet'# '/home/jtan/gpu/jtan/spark/spark2tf/example_train.tfrecords'
    test = '/data/projects/punim0011/jtan/spark/spark2tf/cont_b2sy_By_test.parquet'#'/home/jtan/gpu/jtan/spark/spark2tf/example_test.tfrecords'
    val = '/data/projects/punim0011/jtan/spark/spark2tf/cont_b2sy_By_train.parquet'#'/home/jtan/gpu/jtan/spark/spark2tf/example_test.tfrecords'
    tensorboard = 'tensorboard'
    checkpoints = 'checkpoints'
    best_checkpoints = 'best' 

architecture = '{} - {} | Layers: {} | Dropout: {} | Base LR: {} | Epochs: {}'.format(
    config.channel, config.mode, config.n_layers, config.keep_prob, config.learning_rate, config.num_epochs)
get_available_gpus()

def load_parquet(datasetName, subsample=False, dropFrac=0.75):
    from sklearn.model_selection import train_test_split
    
    excludeFeatures = ['labels', 'mbc', 'deltae', 'daughterInvM', 'nCands', 'evtNum', 'MCtype', 'channel']
    dataset = pq.ParquetDataset(datasetName)
    pdf = dataset.read(nthreads=4).to_pandas()
    pdf = pdf.sample(frac=1).reset_index(drop=True)
    features = pdf.drop(excludeFeatures, axis=1)
    labels = pdf['labels'].astype(np.int32)
    auxillary = pdf[['deltae', 'mbc']]
    auxillary=auxillary.assign(dE_labels=pd.qcut(auxillary['deltae'], q=config.adv_n_classes, labels=False), 
                               mbc_labels=pd.qcut(auxillary['mbc'], q=config.adv_n_classes, labels=False))
    
    return features.values.astype(np.float32), labels.values, auxillary.values.astype(np.float32), pdf


Available GPUs:
['/device:GPU:0', '/device:GPU:1', '/device:GPU:2', '/device:GPU:3']

In [2]:
features, labels, auxillary, pdf = load_parquet(directories.train)
featuresTest, labelsTest, auxillaryTest, pdf_test = load_parquet(directories.test)
config.nTrainExamples, config.nFeatures = features.shape[0], features.shape[-1]
config.steps_per_epoch = features.shape[0] // config.batch_size

In [3]:
def dataset_placeholder(features_placeholder, labels_placeholder, batchSize, numEpochs, training=True):  
    dataset = tf.contrib.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder))
    dataset = dataset.shuffle(buffer_size=16384)
    dataset = dataset.batch(batchSize)
    dataset = dataset.repeat(numEpochs) if training else dataset

    return dataset

def dataset_placeholder_aux(features_placeholder, labels_placeholder, auxillary_placeholder, batchSize, numEpochs, training=True, shuffle=True):  
    dataset = tf.contrib.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder, auxillary_placeholder))
    if shuffle:
        dataset = dataset.shuffle(buffer_size=16384)
    dataset = dataset.batch(batchSize)
    dataset = dataset.repeat(numEpochs) if training else dataset

    return dataset

def dataset_placeholder_plot(features_placeholder, labels_placeholder, auxillary_placeholder, batchSize=51200, training=False):  
    dataset = tf.contrib.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder, auxillary_placeholder))
    dataset = dataset.batch(int(features_placeholder.get_shape()[0]))
    dataset = dataset.repeat()

    return dataset

def dataset_single_epoch(features, labels, auxillary, batchSize, training=True):
    dataset = tf.contrib.data.Dataset.from_tensor_slices((features, labels, auxillary))
    dataset = dataset.shuffle(buffer_size=25600)
    dataset = dataset.batch(batchSize)
    
    return dataset

def dataset_train(dataDirectory, batchSize, numEpochs, nFeatures, training=True):
    filenames = glob.glob('{}/part*'.format(dataDirectory))
    dataset = tf.contrib.data.TFRecordDataset(filenames)

    # Extract data from `tf.Example` protocol buffer
    def parser(record, batchSize=128):
        keys_to_features = {
            "features": tf.FixedLenFeature([nFeatures], tf.float32),
            "labels": tf.FixedLenFeature((), tf.float32,
            default_value=tf.zeros([], dtype=tf.float32)),
#             "mbc": tf.FixedLenFeature((), tf.float32, dtype=tf.float32),
#             "deltae": tf.FixedLenFeature((), tf.float32, dtype=tf.float32)
        }
        parsed = tf.parse_single_example(record, keys_to_features)
        label = tf.cast(parsed['labels'], tf.int32)

        return parsed['features'], label

    # Transform into feature, label tensor pair
    dataset = dataset.map(parser)
    dataset = dataset.shuffle(buffer_size=16384)
    dataset = dataset.batch(batchSize)
    dataset = dataset.repeat(numEpochs) if training else dataset

    return dataset

def dense_builder(x, shape, name, keep_prob, training=True, actv=tf.nn.elu):
    init=tf.contrib.layers.xavier_initializer()
    kwargs = {'center': True, 'scale': True, 'training': training, 'fused': True, 'renorm': True}

    with tf.variable_scope(name, initializer=init) as scope:
        layer = tf.layers.dense(x, units=shape[1], activation=actv)
        bn = tf.layers.batch_normalization(layer, **kwargs)
        layer_out = tf.layers.dropout(bn, 1-keep_prob, training=training)

    return layer_out

def selu_builder(x, shape, name, keep_prob, training=True):
    init = tf.contrib.layers.variance_scaling_initializer(factor=1.0, mode='FAN_IN')

    with tf.variable_scope(name) as scope:
        W = tf.get_variable("weights", shape = shape, initializer=init)
        b = tf.get_variable("biases", shape = [shape[1]], initializer=tf.random_normal_initializer(stddev=0.01))
        actv = selu.selu(tf.add(tf.matmul(x, W), b))
        layer_output = selu.dropout_selu(actv, rate=1-keep_prob, training=training)

    return layer_output

def dense_model(x, n_layers, hidden_layer_nodes, keep_prob, n_input, n_classes, scope, builder=selu_builder, reuse=False, training=True):
    # Extensible dense model
    SELU_initializer = tf.contrib.layers.variance_scaling_initializer(factor=1.0, mode='FAN_IN')
    init = SELU_initializer if builder==selu_builder else tf.contrib.layers.xavier_initializer()
    assert n_layers == len(hidden_layer_nodes), 'Specified layer nodes and number of layers do not correspond.'
    layers = [x]

    with tf.variable_scope(scope, reuse=reuse):
        hidden_0 = builder(x, shape=[n_input, hidden_layer_nodes[0]], name='hidden0',
                                keep_prob = keep_prob, training=training)
        layers.append(hidden_0)
        for n in range(0,n_layers-1):
            hidden_n = builder(layers[-1], shape=[hidden_layer_nodes[n], hidden_layer_nodes[n+1]], name='hidden{}'.format(n+1),
                                keep_prob=keep_prob, training=training)
            layers.append(hidden_n)

        readout = tf.layers.dense(hidden_n, units=n_classes, kernel_initializer=init)

    return readout

def custom_elu(x):
    return tf.nn.elu(x) + 1.0

def log_sum_exp_trick(x, axis=1):
    x_max = tf.reduce_max(x, axis=1, keep_dims=True)
    lse = x_max + tf.log(tf.reduce_sum(tf.exp(x-x_max), axis=1, keep_dims=True))
    return lse

def scope_variables(name):
    with tf.variable_scope(name):
        return tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, 
                       scope=tf.get_variable_scope().name)

def adversary_dense(x, z_pivot, n_layers, hidden_nodes, keep_prob, name, builder=selu_builder, reuse=False, training=True, actv=selu.selu):
    # Extensible dense model
    SELU_initializer = tf.contrib.layers.variance_scaling_initializer(factor=1.0, mode='FAN_IN')
    init = SELU_initializer if builder==selu_builder else tf.contrib.layers.xavier_initializer()
    assert n_layers == len(hidden_nodes), 'Specified layer nodes and number of layers do not correspond.'
    layers = [x]

    with tf.variable_scope('adversary', reuse=reuse):
        hidden_0 = builder(x, shape=[2, hidden_nodes[0]], name='hidden0',
                                keep_prob=keep_prob, training=training)
        layers.append(hidden_0)
        for n in range(0,n_layers-1):
            hidden_n = builder(layers[-1], shape=[hidden_nodes[n], hidden_nodes[n+1]], name='hidden{}'.format(n+1),
                                keep_prob=keep_prob, training=training)
            layers.append(hidden_n)

        fc = tf.layers.dense(hidden_n, units=96, activation=tf.nn.tanh, kernel_initializer=init)
        fc_logits, fc_mu, fc_sigma = tf.split(fc, 3, axis=1)
        logits = tf.layers.dense(fc_logits, units=config.n_gaussians, activation=tf.identity, name='mixing_fractions')
        centers = tf.layers.dense(fc_mu, units=config.n_gaussians, activation=tf.identity, name='means')
        variances = tf.layers.dense(fc_sigma, units=config.n_gaussians, activation=custom_elu, name='variances')
        mixing_coeffs = tf.nn.softmax(logits)
        
        exponent = tf.log(mixing_coeffs) - 1/2 * tf.log(2*np.pi) - tf.log(variances) - tf.square(centers-tf.expand_dims(z_pivot,1))/(2*tf.square(variances))
        # adversary_out = tf.reduce_sum(mixing_coeffs*(1/(tf.sqrt(2*np.pi*variances))*tf.exp(-tf.square(centers-tf.expand_dims(z_pivot,1)) /(2*tf.square(variances))), axis=1)
            
    return log_sum_exp_trick(exponent)    

def selu_adversary(x, z_pivot, n_layers, hidden_nodes, keep_prob, name, reuse=False, training=True, actv=selu.selu):
    SELU_initializer = tf.contrib.layers.variance_scaling_initializer(factor=1.0, mode='FAN_IN')
    xavier = tf.contrib.layers.xavier_initializer(uniform=True, seed=None, dtype=tf.float32)

    with tf.variable_scope('adversary', reuse=reuse):
        l0 = tf.layers.dense(x, units=hidden_nodes[0], activation=actv, kernel_initializer=SELU_initializer)
        d0 = selu.dropout_selu(l0, rate=1-keep_prob, training=training)

        l1 = tf.layers.dense(d0, units=hidden_nodes[1], activation=actv, kernel_initializer=SELU_initializer)
        d1 = selu.dropout_selu(l1, rate=1-keep_prob, training=training)

        fc = tf.layers.dense(d1, units=96, activation=tf.nn.tanh, kernel_initializer=xavier)
        fc_logits, fc_mu, fc_sigma = tf.split(fc, 3, axis=1)
        logits = tf.layers.dense(fc_logits, units=config.n_gaussians, activation=tf.identity, name='mixing_fractions')
        centers = tf.layers.dense(fc_mu, units=config.n_gaussians, activation=tf.identity, name='means')
        variances = tf.layers.dense(fc_sigma, units=config.n_gaussians, activation=custom_elu, name='variances')
        mixing_coeffs = tf.nn.softmax(logits)
        
        exponent = tf.log(mixing_coeffs) - 1/2 * tf.log(2*np.pi) - tf.log(variances) - tf.square(centers-tf.expand_dims(z_pivot,1))/(2*tf.square(variances))
        # adversary_out = tf.reduce_sum(mixing_coeffs*(1/(tf.sqrt(2*np.pi*variances))*tf.exp(-tf.square(centers-tf.expand_dims(z_pivot,1)) /(2*tf.square(variances))), axis=1)
        
    return log_sum_exp_trick(exponent)

In [4]:
class vanillaDNN():
    # Builds the computational graph
    def __init__(self, config, training=True, cyclical=False):
        
        self.global_step = tf.Variable(0, trainable=False)
        self.handle = tf.placeholder(tf.string, shape=[])
        self.training_phase = tf.placeholder(tf.bool)
        self.beta = tf.placeholder(tf.float32) if cyclical else config.learning_rate
#         self.beta = tf.train.exponential_decay(config.learning_rate, self.global_step, 
#                                                decay_steps = config.steps_per_epoch, decay_rate = config.lr_epoch_decay, staircase=True)
        self.features_placeholder = tf.placeholder(features.dtype, features.shape)
        self.labels_placeholder = tf.placeholder(labels.dtype, labels.shape)
        self.auxillary_placeholder = tf.placeholder(auxillary.dtype, auxillary.shape)
        self.featuresTest_placeholder = tf.placeholder(features.dtype, featuresTest.shape)
        self.labelsTest_placeholder = tf.placeholder(labels.dtype, labelsTest.shape)
        self.auxillaryTest_placeholder = tf.placeholder(auxillary.dtype, auxillaryTest.shape)

        preTrainDataset = dataset_single_epoch(self.features_placeholder, self.labels_placeholder, self.auxillary_placeholder, config.batch_size, training=True)
        trainDataset = dataset_placeholder_aux(self.features_placeholder, self.labels_placeholder, self.auxillary_placeholder,
                                           config.batch_size, config.num_epochs, training=True)
        testDataset = dataset_placeholder_aux(self.featuresTest_placeholder, self.labelsTest_placeholder, self.auxillaryTest_placeholder,
                                          config.batch_size, config.num_epochs, training=True)
        plotDataset = dataset_placeholder_plot(self.featuresTest_placeholder, self.labelsTest_placeholder, self.auxillaryTest_placeholder)
                        
        self.iterator = tf.contrib.data.Iterator.from_string_handle(self.handle, trainDataset.output_types, trainDataset.output_shapes)
        self.pretrain_iterator = preTrainDataset.make_initializable_iterator()
        self.train_iterator = trainDataset.make_initializable_iterator()
        self.test_iterator = testDataset.make_initializable_iterator()
        self.plot_iterator = plotDataset.make_initializable_iterator()

        self.example, self.label, self.ancillary = self.iterator.get_next()
        # self.readout = dense_SELU(self.example, config.n_layers, [1024, 1024, 512, 512, 256], config.keep_prob, training=self.training_phase)
        self.readout = dense_model(self.example, config.n_layers, config.hidden_layer_nodes, config.keep_prob, builder=selu_builder, 
                                   n_input=config.nFeatures, n_classes=config.n_classes, reuse=False, training=self.training_phase, scope='predictor')
        
        # Introduce a separate adversary network for each pivot
        for i in range(len(config.pivots)):
            adversary_readout = dense_model(tf.nn.softmax(self.readout), config.adv_n_layers, config.adv_hidden_nodes, config.adv_keep_prob,
                n_input=2, n_classes=config.adv_n_classes, builder=selu_builder, training=self.training_phase, scope='adversary_{}'.format(config.pivots[i]))
            adversary_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=adversary_readout, 
                                                                                           labels=tf.cast(self.ancillary[:,i+2], tf.int32)))
            tf.add_to_collection('adversary_losses', adversary_loss)

        # Mask signal values in adversary loss
        self.adversary_loss = tf.add_n(tf.get_collection('adversary_losses'), name='total_adversary_loss')
        self.predictor_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.readout, labels=self.label))
        self.total_loss = self.predictor_loss - config.adv_lambda*self.adversary_loss
        
        theta_f = scope_variables('predictor')
        theta_r = scope_variables('adversary')
        self.theta = theta_f, theta_r

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            # Ensures that we execute the update_ops before performing the train_step
            predictor_optimizer = tf.train.AdamOptimizer(config.learning_rate)
            predictor_gs = tf.Variable(0, name='predictor_global_step', trainable=False)
            self.predictor_train_op = predictor_optimizer.minimize(self.predictor_loss, name='predictor_opt', global_step=predictor_gs, var_list=theta_f)
            predictor_optimize = predictor_optimizer.minimize(self.total_loss, name='predictor_opt', global_step=predictor_gs, var_list=theta_f)
            # self.joint_train_op = predictor_optimizer.minimize(self.total_loss, name='joint_opt', global_step=predictor_gs, var_list=theta_f)

            adversary_optimizer = tf.train.AdamOptimizer(config.adv_learning_rate)
            adversary_gs = tf.Variable(0, name='adversary_global_step', trainable=False)
            self.adversary_train_op = adversary_optimizer.minimize(self.adversary_loss, name='adversary_opt', global_step=adversary_gs, var_list=theta_r)

        self.ema = tf.train.ExponentialMovingAverage(decay=config.ema_decay, num_updates=predictor_gs, name='predictor_ema')
        maintain_predictor_averages_op = self.ema.apply(theta_f)
        with tf.control_dependencies([predictor_optimize]):
            self.joint_train_op = tf.group(maintain_predictor_averages_op)

        # Evaluation metrics
        self.cross_entropy = self.predictor_loss
        self.p = tf.nn.softmax(self.readout)
        self.transform = tf.log(self.p[:,1]/(1-self.p[:,1]+config.epsilon)+config.epsilon)
        correct_prediction = tf.equal(tf.cast(tf.argmax(self.readout, 1), tf.int32), self.label)
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        _, self.auc_op = tf.metrics.auc(predictions = tf.argmax(self.readout,1), labels = self.label, num_thresholds = 1024)
        self.pearson_dE, self.pearson_dE_op =  tf.contrib.metrics.streaming_pearson_correlation(predictions=self.transform,
                                                                                                labels=self.ancillary[:,0], name='pearson_dE')
        self.pearson_mbc, self.pearson_mbc_op =  tf.contrib.metrics.streaming_pearson_correlation(predictions=self.transform,
                                                                                                  labels=self.ancillary[:,1], name='pearson_mbc')
        self.adv_xentropy = self.adversary_loss
        adv_correct_prediction = tf.equal(tf.cast(tf.argmax(adversary_readout,1), tf.int32), tf.cast(self.ancillary[:,3], tf.int32))
        self.adv_accuracy = tf.reduce_mean(tf.cast(adv_correct_prediction, tf.float32))
        
        tf.summary.scalar('accuracy', self.accuracy)
        tf.summary.scalar('auc', self.auc_op)
        tf.summary.scalar('learning_rate', self.beta)
        tf.summary.scalar('predictor_loss', self.predictor_loss)
        tf.summary.scalar('adversary_loss', self.adversary_loss)
        tf.summary.scalar('total_loss', self.total_loss)        
        tf.summary.scalar('pearson_dE', self.pearson_dE_op)
        tf.summary.scalar('pearson_mbc', self.pearson_mbc_op)
        
        self.merge_op = tf.summary.merge_all()
        self.train_writer = tf.summary.FileWriter(
            os.path.join(directories.tensorboard, 'train_{}'.format(time.strftime('%d-%m_%I:%M'))), graph = tf.get_default_graph())
        self.test_writer = tf.summary.FileWriter(
            os.path.join(directories.tensorboard, 'test_{}'.format(time.strftime('%d-%m_%I:%M'))))

    def predict(self, ckpt):
        pin_cpu = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True, device_count = {'GPU':0})
        start_time = time.time()
        
        # Restore the moving average version of the learned variables for eval.
        variables_to_restore = self.ema.variables_to_restore().update(dict((v, v) for v in scope_variables('adversary')))
        
        saver = tf.train.Saver(variables_to_restore)
        # saver = tf.train.Saver()
        valDataset = dataset_placeholder_aux(self.featuresTest_placeholder, self.labelsTest_placeholder, self.auxillaryTest_placeholder,
                                          config.batch_size, config.num_epochs, training=False, shuffle=False)
        val_iterator = valDataset.make_initializable_iterator()
        concatLabels = tf.cast(self.label, tf.int32)
        concatPreds = tf.cast(tf.argmax(self.readout,1), tf.int32)
        concatOutput = self.p[:,1]

        with tf.Session(config=pin_cpu) as sess:
            # Initialize variables
            init_op = tf.global_variables_initializer()
            sess.run(init_op)
            sess.run(tf.local_variables_initializer())
            assert (ckpt.model_checkpoint_path), 'Missing checkpoint file!'    
            saver.restore(sess, ckpt.model_checkpoint_path)
            print('{} restored.'.format(ckpt.model_checkpoint_path))
            val_handle = sess.run(val_iterator.string_handle())
            labels, preds, outputs = [], [], []
            sess.run(val_iterator.initializer, feed_dict={vDNN.featuresTest_placeholder: featuresTest,
                                                          vDNN.labelsTest_placeholder: labelsTest, 
                                                          vDNN.auxillaryTest_placeholder: auxillaryTest})
            while True:
                try:
                    l, p, o = sess.run([concatLabels, concatPreds, concatOutput], 
                                       feed_dict = {vDNN.training_phase: False, vDNN.handle: val_handle})
                    labels.append(l), preds.append(p), outputs.append(o)
                except tf.errors.OutOfRangeError:
                    labels, preds, outputs = np.concatenate(labels), np.concatenate(preds), np.concatenate(outputs)
                    break
            acc = np.mean(np.equal(labels,preds))
            print("Validation accuracy: {:.3f}".format(acc))
            
            plot_ROC_curve(network_output=outputs, y_true=labels, identifier=config.mode+config.channel,
                           meta=architecture + ' | Test accuracy: {:.3f}'.format(acc))
            delta_t = time.time() - start_time
            print("Inference complete. Duration: %g s" %(delta_t))
            
            return labels, preds, outputs

In [ ]:
def train(config, restore = False):
    # Executes training operations
    print('Architecture: {}'.format(architecture))
    vDNN = vanillaDNN(config, training=True)
    start_time = time.time()
    global_step, v_auc_best = 0, 0.
    saver = tf.train.Saver()
    ckpt = tf.train.get_checkpoint_state(directories.checkpoints)

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) as sess:
        # Initialize variables
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        pretrain_handle = sess.run(vDNN.pretrain_iterator.string_handle())
        train_handle = sess.run(vDNN.train_iterator.string_handle())
        test_handle = sess.run(vDNN.test_iterator.string_handle())
        plot_handle = sess.run(vDNN.plot_iterator.string_handle())
        
        if restore and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            print('{} restored.'.format(ckpt.model_checkpoint_path))
            
        train_init = {vDNN.features_placeholder: features, vDNN.labels_placeholder: labels, vDNN.auxillary_placeholder: auxillary}
        test_init = {vDNN.featuresTest_placeholder: featuresTest, vDNN.labelsTest_placeholder: labelsTest, vDNN.auxillaryTest_placeholder: auxillaryTest}
        pretrain_feed = {vDNN.training_phase: True, vDNN.handle: pretrain_handle}
        train_feed = {vDNN.training_phase: True, vDNN.handle: train_handle}
        sess.run(vDNN.train_iterator.initializer, feed_dict=train_init)
        sess.run(vDNN.test_iterator.initializer, feed_dict=test_init)
        sess.run(vDNN.plot_iterator.initializer, feed_dict=test_init)
        
        def pretrain(iterator, train_op, feed_dict_init, feed_dict_train, n_epochs):
            for epoch in range(n_epochs):
                sess.run(iterator.initializer, feed_dict=feed_dict_init)
                while True:
                    try:
                        sess.run(train_op, feed_dict=feed_dict_train)
                    except tf.errors.OutOfRangeError:
                        break

                plot_distributions(vDNN, epoch, sess, handle=plot_handle)
                run_diagnostics(vDNN, config, directories, sess, saver, train_handle, test_handle, global_step, 
                                config.nTrainExamples, start_time, v_auc_best, epoch)
        
#         pretrain(vDNN.pretrain_iterator, train_op=vDNN.predictor_train_op, feed_dict_init=train_init, feed_dict_train=pretrain_feed, n_epochs=5)
#         pretrain(vDNN.pretrain_iterator, train_op=vDNN.adversary_train_op, feed_dict_init=train_init, feed_dict_train=pretrain_feed, n_epochs=1)

        while True:
            try:
                if config.adversary:
                    # adversary trains in inner loop
                    if global_step % config.K == 0:
                        sess.run(vDNN.joint_train_op, feed_dict=train_feed)
                    else:
                        sess.run(vDNN.adversary_train_op, feed_dict=train_feed)
                    global_step+=1

                    if global_step % (config.steps_per_epoch) == 0:
                        epoch, v_auc_best = run_adv_diagnostics(vDNN, config, directories, sess, saver, train_handle, test_handle, 
                                                            global_step, config.nTrainExamples, start_time, v_auc_best)
                        plot_distributions(vDNN, epoch, sess, handle=plot_handle)
                else:
                    # Run X steps on training dataset
                    sess.run(vDNN.predictor_train_op, feed_dict=train_feed)
                    global_step+=1

                    if global_step % (config.steps_per_epoch // 4) == 0:
                        epoch, v_auc_best = run_diagnostics(vDNN, config, directories, sess, saver, train_handle, test_handle, 
                                                            global_step, config.nTrainExamples, start_time, v_auc_best)

            except tf.errors.OutOfRangeError:
                break

        save_path = saver.save(sess, os.path.join(directories.checkpoints, 'vDNN_{}_{}_end.ckpt'.format(config.mode, config.channel)), global_step=epoch)
    
    print("Training Complete. Model saved to file: {} Time elapsed: {:.3f} s".format(save_path, time.time()-start_time))

In [ ]:
train(config)#, restore=True)


Architecture: B2Xsy - adv-selu | Layers: 7 | Dropout: 0.95 | Base LR: 2.5e-05 | Epochs: 512
Graph saved to file: checkpoints/vDNN_adv-selu_B2Xsy_epoch0.ckpt-0
Epoch 0, Step 1321728 | Training Acc: 0.958 | Test Acc: 0.926 | Test Loss: 0.236 | Test AUC: 0.897 | Total loss: -4.260 | Rate: 263 examples/s (52.97 s) [*]
Post-selection: 606
Post-selection: 606
Epoch 1, Step 1321491 | Training Acc: 0.878 | Test Acc: 0.938 | Test Loss: 0.160 | Test AUC: 0.840 | Total loss: -4.376 | Rate: 1692 examples/s (109.05 s) 
Post-selection: 606
Post-selection: 606
Epoch 2, Step 1321254 | Training Acc: 0.920 | Test Acc: 0.961 | Test Loss: 0.100 | Test AUC: 0.900 | Total loss: -4.440 | Rate: 1813 examples/s (162.73 s) [*]
Post-selection: 606
Post-selection: 606
Epoch 3, Step 1321017 | Training Acc: 0.954 | Test Acc: 0.938 | Test Loss: 0.173 | Test AUC: 0.913 | Total loss: -4.355 | Rate: 1699 examples/s (216.59 s) [*]
Post-selection: 606
Post-selection: 606
Epoch 4, Step 1320780 | Training Acc: 0.920 | Test Acc: 0.941 | Test Loss: 0.204 | Test AUC: 0.895 | Total loss: -4.233 | Rate: 1703 examples/s (270.21 s) 
Post-selection: 606
Post-selection: 606
Epoch 5, Step 1320543 | Training Acc: 0.958 | Test Acc: 0.957 | Test Loss: 0.139 | Test AUC: 0.935 | Total loss: -4.377 | Rate: 1760 examples/s (323.81 s) [*]
Post-selection: 606
Post-selection: 606
Epoch 6, Step 1320306 | Training Acc: 0.958 | Test Acc: 0.945 | Test Loss: 0.147 | Test AUC: 0.927 | Total loss: -4.381 | Rate: 1668 examples/s (377.44 s) 
Post-selection: 606
Post-selection: 606
Graph saved to file: best/vDNN_adv-selu_B2Xsy_epoch7.ckpt-7
Epoch 7, Step 1320069 | Training Acc: 0.958 | Test Acc: 0.957 | Test Loss: 0.126 | Test AUC: 0.936 | Total loss: -4.391 | Rate: 402 examples/s (431.78 s) [*]
Post-selection: 606
Post-selection: 606
Epoch 8, Step 1319832 | Training Acc: 0.975 | Test Acc: 0.922 | Test Loss: 0.219 | Test AUC: 0.913 | Total loss: -4.347 | Rate: 1757 examples/s (485.68 s) 
Post-selection: 606
Post-selection: 606
Epoch 9, Step 1319595 | Training Acc: 0.949 | Test Acc: 0.949 | Test Loss: 0.143 | Test AUC: 0.922 | Total loss: -4.367 | Rate: 1723 examples/s (539.55 s) 
Post-selection: 606
Post-selection: 606
Epoch 10, Step 1319358 | Training Acc: 0.916 | Test Acc: 0.945 | Test Loss: 0.186 | Test AUC: 0.906 | Total loss: -4.260 | Rate: 1717 examples/s (594.01 s) 
Post-selection: 606
Post-selection: 606
Epoch 11, Step 1319121 | Training Acc: 0.945 | Test Acc: 0.918 | Test Loss: 0.228 | Test AUC: 0.889 | Total loss: -4.263 | Rate: 1692 examples/s (647.94 s) 
Post-selection: 606
Post-selection: 606
Epoch 12, Step 1318884 | Training Acc: 0.937 | Test Acc: 0.926 | Test Loss: 0.185 | Test AUC: 0.907 | Total loss: -4.400 | Rate: 1737 examples/s (701.57 s) 
Post-selection: 606
Post-selection: 606
Epoch 13, Step 1318647 | Training Acc: 0.937 | Test Acc: 0.961 | Test Loss: 0.121 | Test AUC: 0.922 | Total loss: -4.398 | Rate: 1650 examples/s (755.41 s) 
Post-selection: 606
Post-selection: 606
Epoch 14, Step 1318410 | Training Acc: 0.962 | Test Acc: 0.938 | Test Loss: 0.165 | Test AUC: 0.934 | Total loss: -4.339 | Rate: 1766 examples/s (809.67 s) 
Post-selection: 606
Post-selection: 606
Epoch 15, Step 1318173 | Training Acc: 0.937 | Test Acc: 0.961 | Test Loss: 0.147 | Test AUC: 0.919 | Total loss: -4.361 | Rate: 1703 examples/s (863.41 s) 
Post-selection: 606
Post-selection: 606
Graph saved to file: checkpoints/vDNN_adv-selu_B2Xsy_epoch16.ckpt-16
Epoch 16, Step 1317936 | Training Acc: 0.941 | Test Acc: 0.938 | Test Loss: 0.213 | Test AUC: 0.900 | Total loss: -4.354 | Rate: 348 examples/s (918.65 s) 
Post-selection: 606
Post-selection: 606
Epoch 17, Step 1317699 | Training Acc: 0.945 | Test Acc: 0.945 | Test Loss: 0.162 | Test AUC: 0.928 | Total loss: -4.328 | Rate: 1727 examples/s (972.29 s) 
Post-selection: 606
Post-selection: 606
Epoch 18, Step 1317462 | Training Acc: 0.949 | Test Acc: 0.941 | Test Loss: 0.138 | Test AUC: 0.920 | Total loss: -4.406 | Rate: 1763 examples/s (1026.11 s) 
Post-selection: 606
Post-selection: 606
Epoch 19, Step 1317225 | Training Acc: 0.949 | Test Acc: 0.961 | Test Loss: 0.133 | Test AUC: 0.936 | Total loss: -4.419 | Rate: 1781 examples/s (1079.83 s) 
Post-selection: 606
Post-selection: 606

Making Predictions

Classification on a new instance is given by the softmax of the output of the final readout layer.


In [6]:
import os
ckpt = tf.train.get_checkpoint_state('best')
vDNN = vanillaDNN(config, training = False)
labels, preds, output = vDNN.predict(ckpt)

# Add predictions to test set as a new column, save as HDF5
output = pd.Series(output, name='preds')
test = pd.concat([pdf_test, output], axis=1)


INFO:tensorflow:Restoring parameters from best/vDNN_adv-selu_B2Xsy_epoch81.ckpt-81
best/vDNN_adv-selu_B2Xsy_epoch81.ckpt-81 restored.
Validation accuracy: 0.952
AUC: 0.9854177264443642
Plotting signal efficiency versus background rejection
<matplotlib.figure.Figure at 0x2b88c6f3dba8>
Inference complete. Duration: 18.8827 s

In [11]:
bkg = test[test['labels']<0.5]
sig = test[test['labels']==1]

In [16]:
post_cut = bkg[bkg['probs']>0.9]
post_sig = sig[sig['probs']>0.9]

In [19]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.distplot(sig['probs'], kde=False)
plt.show()



In [14]:
test.to_hdf(os.path.join(directories.checkpoints, 'adv_both{}_preds.h5'.format(os.path.basename(directories.test))), key = 'df', format='t', data_columns=True)

In [11]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
plt.style.use('seaborn-darkgrid')
plt.style.use('seaborn-talk')
plt.style.use('seaborn-pastel')

In [8]:
def normPlot(variable, pdf, epoch, signal, nbins=50, bkg_rejection=0.95):
    from scipy import stats
    titles={'mbc': r'$M_{bc}$ (GeV)', 'deltae': r'$\Delta E$ (GeV)', 'daughterInvM': r'$M_{X_q}$ (GeV)'}
    bkg = pdf[pdf['labels']<0.5]
    post_bkg = bkg.nlargest(int(bkg.shape[0]*(1-bkg_rejection)), columns=['preds'])
    threshold = post_bkg['preds'].min()
    print('Post-selection:', post_bkg.shape[0])
    if signal:
        sig = pdf[pdf['labels']==1]
        post_sig = pdf[(pdf['labels']==1) & (pdf['preds']>threshold)]
        sns.distplot(post_sig[variable], hist=True, kde=True, label='Signal - {} BG rejection'.format(bkg_rejection), bins=nbins,
                     hist_kws=dict(edgecolor="0.85", linewidth=0.5, alpha = 0.8))
        sns.distplot(sig[variable], hist=True, kde=True, label='Signal', bins=nbins,
                     hist_kws=dict(edgecolor="0.85", linewidth=0.5, alpha = 0.8))
    else:
        sns.distplot(post_bkg[variable], hist=True, kde=False, norm_hist=True, label='Background - {} BG rejection'.format(bkg_rejection), bins=nbins,# fit=stats.norm,
                     hist_kws=dict(edgecolor="0.85", linewidth=0.5, alpha = 0.8))
        sns.distplot(bkg[variable], hist=True, kde=True, label='Background', bins=nbins,
                     hist_kws=dict(edgecolor="0.85", linewidth=0.5, alpha = 0.8))
    plt.xlabel(r'{}'.format(titles[variable]))
    plt.ylabel(r'Normalized events/bin')
    plt.legend(loc = "best")
    plt.savefig('best/{}_adv-ep{}-nb.pdf'.format(variable, epoch), bbox_inches='tight',format='pdf', dpi=1000)
    plt.show()
    plt.gcf().clear()

In [13]:
normPlot('deltae', test, signal=False, epoch='talk_bkg_mbc_adv-nn')


Post-selection: 6064

In [15]:
normPlot('mbc', test, signal=False, epoch='talk_bkg_mbc_adv-nn')


Post-selection: 6064

In [ ]: