Adversarial training for robustness against systematic error.

import tensorflow as tf
import numpy as np
import pandas as pd
import pyarrow.parquet as pq
import glob, time, os
import selu
from diagnostics import *

class config(object):
    # Set network parameters
    mode = 'adv-selu'
    channel = 'B2Xsy'
    keep_prob = 0.95
    num_epochs = 512
    batch_size = 256
    n_layers = 7
    adv_n_layers = 4
    adv_keep_prob = 1.0
    hidden_layer_nodes = [1024, 1024, 512, 512, 512, 256, 256]
    adv_hidden_nodes = [256,512,512,256]
    ema_decay = 0.999
    learning_rate = 1e-5
    adv_learning_rate = 0.001
    adv_lambda = 8
    cycles = 3 # Number of annealing cycles
    n_classes = 2
    adv_n_classes = 10
    epsilon = 1e-8
    builder = 'selu'
    K = 4
    adversary = True
    pivots = ['deltae','mbc']

class directories(object):
    train = '/data/projects/punim0011/jtan/spark/spark2tf/cont_b2sy_By_val.parquet'# '/home/jtan/gpu/jtan/spark/spark2tf/example_train.tfrecords'
    test = '/data/projects/punim0011/jtan/spark/spark2tf/cont_b2sy_By_test.parquet'#'/home/jtan/gpu/jtan/spark/spark2tf/example_test.tfrecords'
    val = '/data/projects/punim0011/jtan/spark/spark2tf/cont_b2sy_By_train.parquet'#'/home/jtan/gpu/jtan/spark/spark2tf/example_test.tfrecords'
    tensorboard = 'tensorboard'
    checkpoints = 'checkpoints'
    best_checkpoints = 'best' 

architecture = '{} - {} | Layers: {} | Dropout: {} | Base LR: {} | Epochs: {}'.format(, config.mode, config.n_layers, config.keep_prob, config.learning_rate, config.num_epochs)

def load_parquet(datasetName, subsample=False, dropFrac=0.75):
    from sklearn.model_selection import train_test_split
    excludeFeatures = ['labels', 'mbc', 'deltae', 'daughterInvM', 'nCands', 'evtNum', 'MCtype', 'channel']
    dataset = pq.ParquetDataset(datasetName)
    pdf =
    pdf = pdf.sample(frac=1).reset_index(drop=True)
    features = pdf.drop(excludeFeatures, axis=1)
    labels = pdf['labels'].astype(np.int32)
    auxillary = pdf[['deltae', 'mbc']]
    auxillary=auxillary.assign(dE_labels=pd.qcut(auxillary['deltae'], q=config.adv_n_classes, labels=False), 
                               mbc_labels=pd.qcut(auxillary['mbc'], q=config.adv_n_classes, labels=False))
    return features.values.astype(np.float32), labels.values, auxillary.values.astype(np.float32), pdf

Available GPUs:
['/device:GPU:0', '/device:GPU:1', '/device:GPU:2', '/device:GPU:3']

features, labels, auxillary, pdf = load_parquet(directories.train)
featuresTest, labelsTest, auxillaryTest, pdf_test = load_parquet(directories.test)
config.nTrainExamples, config.nFeatures = features.shape[0], features.shape[-1]
config.steps_per_epoch = features.shape[0] // config.batch_size

def dataset_placeholder(features_placeholder, labels_placeholder, batchSize, numEpochs, training=True):  
    dataset =, labels_placeholder))
    dataset = dataset.shuffle(buffer_size=16384)
    dataset = dataset.batch(batchSize)
    dataset = dataset.repeat(numEpochs) if training else dataset

    return dataset

def dataset_placeholder_aux(features_placeholder, labels_placeholder, auxillary_placeholder, batchSize, numEpochs, training=True, shuffle=True):  
    dataset =, labels_placeholder, auxillary_placeholder))
    if shuffle:
        dataset = dataset.shuffle(buffer_size=16384)
    dataset = dataset.batch(batchSize)
    dataset = dataset.repeat(numEpochs) if training else dataset

    return dataset

def dataset_placeholder_plot(features_placeholder, labels_placeholder, auxillary_placeholder, batchSize=51200, training=False):  
    dataset =, labels_placeholder, auxillary_placeholder))
    dataset = dataset.batch(int(int(features_placeholder.get_shape()[0])/8))
    dataset = dataset.repeat()

    return dataset

def dataset_single_epoch(features, labels, auxillary, batchSize, training=True):
    dataset =, labels, auxillary))
    dataset = dataset.shuffle(buffer_size=25600)
    dataset = dataset.batch(batchSize)
    return dataset

def dataset_train(dataDirectory, batchSize, numEpochs, nFeatures, training=True):
    filenames = glob.glob('{}/part*'.format(dataDirectory))
    dataset =

    # Extract data from `tf.Example` protocol buffer
    def parser(record, batchSize=128):
        keys_to_features = {
            "features": tf.FixedLenFeature([nFeatures], tf.float32),
            "labels": tf.FixedLenFeature((), tf.float32,
            default_value=tf.zeros([], dtype=tf.float32)),
#             "mbc": tf.FixedLenFeature((), tf.float32, dtype=tf.float32),
#             "deltae": tf.FixedLenFeature((), tf.float32, dtype=tf.float32)
        parsed = tf.parse_single_example(record, keys_to_features)
        label = tf.cast(parsed['labels'], tf.int32)

        return parsed['features'], label

    # Transform into feature, label tensor pair
    dataset =
    dataset = dataset.shuffle(buffer_size=16384)
    dataset = dataset.batch(batchSize)
    dataset = dataset.repeat(numEpochs) if training else dataset

    return dataset

def dense_builder(x, shape, name, keep_prob, training=True, actv=tf.nn.elu):
    kwargs = {'center': True, 'scale': True, 'training': training, 'fused': True, 'renorm': True}

    with tf.variable_scope(name, initializer=init) as scope:
        layer = tf.layers.dense(x, units=shape[1], activation=actv)
        bn = tf.layers.batch_normalization(layer, **kwargs)
        layer_out = tf.layers.dropout(bn, 1-keep_prob, training=training)

    return layer_out

def selu_builder(x, shape, name, keep_prob, training=True):
    init = tf.contrib.layers.variance_scaling_initializer(factor=1.0, mode='FAN_IN')

    with tf.variable_scope(name) as scope:
        W = tf.get_variable("weights", shape = shape, initializer=init)
        b = tf.get_variable("biases", shape = [shape[1]], initializer=tf.random_normal_initializer(stddev=0.01))
        actv = selu.selu(tf.add(tf.matmul(x, W), b))
        layer_output = selu.dropout_selu(actv, rate=1-keep_prob, training=training)

    return layer_output

def dense_model(x, n_layers, hidden_layer_nodes, keep_prob, n_input, n_classes, scope, builder=selu_builder, reuse=False, training=True):
    # Extensible dense model
    SELU_initializer = tf.contrib.layers.variance_scaling_initializer(factor=1.0, mode='FAN_IN')
    init = SELU_initializer if builder==selu_builder else tf.contrib.layers.xavier_initializer()
    assert n_layers == len(hidden_layer_nodes), 'Specified layer nodes and number of layers do not correspond.'
    layers = [x]

    with tf.variable_scope(scope, reuse=reuse):
        hidden_0 = builder(x, shape=[n_input, hidden_layer_nodes[0]], name='hidden0',
                                keep_prob = keep_prob, training=training)
        for n in range(0,n_layers-1):
            hidden_n = builder(layers[-1], shape=[hidden_layer_nodes[n], hidden_layer_nodes[n+1]], name='hidden{}'.format(n+1),
                                keep_prob=keep_prob, training=training)

        readout = tf.layers.dense(hidden_n, units=n_classes, kernel_initializer=init)

    return readout

def scope_variables(name):
    with tf.variable_scope(name):
        return tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, 

class vanillaDNN():
    # Builds the computational graph
    def __init__(self, config, training=True, cyclical=False):
        self.global_step = tf.Variable(0, trainable=False)
        self.handle = tf.placeholder(tf.string, shape=[])
        self.training_phase = tf.placeholder(tf.bool)
        self.beta = tf.placeholder(tf.float32) if cyclical else config.learning_rate
#         self.beta = tf.train.exponential_decay(config.learning_rate, self.global_step, 
#                                                decay_steps = config.steps_per_epoch, decay_rate = config.lr_epoch_decay, staircase=True)
        self.features_placeholder = tf.placeholder(features.dtype, features.shape)
        self.labels_placeholder = tf.placeholder(labels.dtype, labels.shape)
        self.auxillary_placeholder = tf.placeholder(auxillary.dtype, auxillary.shape)
        self.featuresTest_placeholder = tf.placeholder(features.dtype, featuresTest.shape)
        self.labelsTest_placeholder = tf.placeholder(labels.dtype, labelsTest.shape)
        self.auxillaryTest_placeholder = tf.placeholder(auxillary.dtype, auxillaryTest.shape)

        preTrainDataset = dataset_single_epoch(self.features_placeholder, self.labels_placeholder, self.auxillary_placeholder, config.batch_size, training=True)
        trainDataset = dataset_placeholder_aux(self.features_placeholder, self.labels_placeholder, self.auxillary_placeholder,
                                           config.batch_size, config.num_epochs, training=True)
        testDataset = dataset_placeholder_aux(self.featuresTest_placeholder, self.labelsTest_placeholder, self.auxillaryTest_placeholder,
                                          config.batch_size, config.num_epochs, training=True)
        plotDataset = dataset_placeholder_plot(self.featuresTest_placeholder, self.labelsTest_placeholder, self.auxillaryTest_placeholder)
        self.iterator =, trainDataset.output_types, trainDataset.output_shapes)
        self.pretrain_iterator = preTrainDataset.make_initializable_iterator()
        self.train_iterator = trainDataset.make_initializable_iterator()
        self.test_iterator = testDataset.make_initializable_iterator()
        self.plot_iterator = plotDataset.make_initializable_iterator()

        self.example, self.label, self.ancillary = self.iterator.get_next()
        self.readout = dense_model(self.example, config.n_layers, config.hidden_layer_nodes, config.keep_prob, builder=selu_builder, 
                                   n_input=config.nFeatures, n_classes=config.n_classes, reuse=False, training=self.training_phase, scope='predictor')

        # Introduce a separate adversary network for each pivot
        for i in range(len(config.pivots)):
            # for mode in ['signal', 'background']:
            mode = 'background'
            print('Building {} adversary for {} events.'.format(config.pivots[i], mode))
            adversary_readout = dense_model(tf.nn.softmax(self.readout), config.adv_n_layers, config.adv_hidden_nodes, config.adv_keep_prob, n_input=2, 
                    n_classes=config.adv_n_classes, training=self.training_phase, scope='adversary_{}_{}'.format(config.pivots[i], mode))
            # mask = tf.cast(self.label - (not mode=='signal'), tf.float32)
            adversary_loss = tf.reduce_mean(tf.cast((1-self.label), tf.float32)*tf.nn.sparse_softmax_cross_entropy_with_logits(logits=adversary_readout, 
                                                                                                labels=tf.cast(self.ancillary[:,i+2], tf.int32)))
#             adversary_loss = tf.reduce_mean(mask*tf.nn.sparse_softmax_cross_entropy_with_logits(logits=adversary_readout, 
#                                                                                                 labels=tf.cast(self.ancillary[:,i+2], tf.int32)))
            tf.add_to_collection('adversary_losses', adversary_loss)
        # Mask signal values in adversary loss
        self.adversary_loss = tf.add_n(tf.get_collection('adversary_losses'), name='total_adversary_loss')
        self.predictor_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.readout, labels=self.label))
        self.total_loss = self.predictor_loss - config.adv_lambda*self.adversary_loss
        theta_f = scope_variables('predictor')
        theta_r = scope_variables('adversary')
        self.theta = theta_f, theta_r

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            # Ensures that we execute the update_ops before performing the train_step
            predictor_optimizer = tf.train.AdamOptimizer(config.learning_rate)
            predictor_gs = tf.Variable(0, name='predictor_global_step', trainable=False)
            self.predictor_train_op = predictor_optimizer.minimize(self.predictor_loss, name='predictor_opt', global_step=predictor_gs, var_list=theta_f)
            predictor_optimize = predictor_optimizer.minimize(self.total_loss, name='predictor_opt', global_step=predictor_gs, var_list=theta_f)
            # self.joint_train_op = predictor_optimizer.minimize(self.total_loss, name='joint_opt', global_step=predictor_gs, var_list=theta_f)

            adversary_optimizer = tf.train.AdamOptimizer(config.adv_learning_rate)
            adversary_gs = tf.Variable(0, name='adversary_global_step', trainable=False)
            self.adversary_train_op = adversary_optimizer.minimize(self.adversary_loss, name='adversary_opt', global_step=adversary_gs, var_list=theta_r)

        self.ema = tf.train.ExponentialMovingAverage(decay=config.ema_decay, num_updates=predictor_gs, name='predictor_ema')
        maintain_predictor_averages_op = self.ema.apply(theta_f)
        with tf.control_dependencies([predictor_optimize]):
            self.joint_train_op =

        # Evaluation metrics
        self.cross_entropy = self.predictor_loss
        self.p = tf.nn.softmax(self.readout)
        self.transform = tf.log(self.p[:,1]/(1-self.p[:,1]+config.epsilon)+config.epsilon)
        correct_prediction = tf.equal(tf.cast(tf.argmax(self.readout, 1), tf.int32), self.label)
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        _, self.auc_op = tf.metrics.auc(predictions = tf.argmax(self.readout,1), labels = self.label, num_thresholds = 1024)
        self.pearson_dE, self.pearson_dE_op =  tf.contrib.metrics.streaming_pearson_correlation(predictions=self.transform,
                                                                                                labels=self.ancillary[:,0], name='pearson_dE')
        self.pearson_mbc, self.pearson_mbc_op =  tf.contrib.metrics.streaming_pearson_correlation(predictions=self.transform,
                                                                                                  labels=self.ancillary[:,1], name='pearson_mbc')
        self.adv_xentropy = self.adversary_loss
        adv_correct_prediction = tf.equal(tf.cast(tf.argmax(adversary_readout,1), tf.int32), tf.cast(self.ancillary[:,3], tf.int32))
        self.adv_accuracy = tf.reduce_mean(tf.cast(adv_correct_prediction, tf.float32))
        tf.summary.scalar('accuracy', self.accuracy)
        tf.summary.scalar('auc', self.auc_op)
        tf.summary.scalar('learning_rate', self.beta)
        tf.summary.scalar('predictor_loss', self.predictor_loss)
        tf.summary.scalar('adversary_loss', self.adversary_loss)
        tf.summary.scalar('total_loss', self.total_loss)        
        tf.summary.scalar('pearson_dE', self.pearson_dE_op)
        tf.summary.scalar('pearson_mbc', self.pearson_mbc_op)
        self.merge_op = tf.summary.merge_all()
        self.train_writer = tf.summary.FileWriter(
            os.path.join(directories.tensorboard, 'train_{}'.format(time.strftime('%d-%m_%I:%M'))), graph = tf.get_default_graph())
        self.test_writer = tf.summary.FileWriter(
            os.path.join(directories.tensorboard, 'test_{}'.format(time.strftime('%d-%m_%I:%M'))))

    def predict(self, ckpt):
        pin_cpu = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True, device_count = {'GPU':0})
        start_time = time.time()
        # Restore the moving average version of the learned variables for eval.
        variables_to_restore = self.ema.variables_to_restore().update(dict((v, v) for v in scope_variables('adversary')))
        saver = tf.train.Saver(variables_to_restore)
        # saver = tf.train.Saver()
        valDataset = dataset_placeholder_aux(self.featuresTest_placeholder, self.labelsTest_placeholder, self.auxillaryTest_placeholder,
                                          config.batch_size, config.num_epochs, training=False, shuffle=False)
        val_iterator = valDataset.make_initializable_iterator()
        concatLabels = tf.cast(self.label, tf.int32)
        concatPreds = tf.cast(tf.argmax(self.readout,1), tf.int32)
        concatOutput = self.p[:,1]

        with tf.Session(config=pin_cpu) as sess:
            # Initialize variables
            init_op = tf.global_variables_initializer()
            assert (ckpt.model_checkpoint_path), 'Missing checkpoint file!'    
            saver.restore(sess, ckpt.model_checkpoint_path)
            print('{} restored.'.format(ckpt.model_checkpoint_path))
            val_handle =
            labels, preds, outputs = [], [], []
  , feed_dict={vDNN.featuresTest_placeholder: featuresTest,
                                                          vDNN.labelsTest_placeholder: labelsTest, 
                                                          vDNN.auxillaryTest_placeholder: auxillaryTest})
            while True:
                    l, p, o =[concatLabels, concatPreds, concatOutput], 
                                       feed_dict = {vDNN.training_phase: False, vDNN.handle: val_handle})
                    labels.append(l), preds.append(p), outputs.append(o)
                except tf.errors.OutOfRangeError:
                    labels, preds, outputs = np.concatenate(labels), np.concatenate(preds), np.concatenate(outputs)
            acc = np.mean(np.equal(labels,preds))
            print("Validation accuracy: {:.3f}".format(acc))
            plot_ROC_curve(network_output=outputs, y_true=labels,,
                           meta=architecture + ' | Test accuracy: {:.3f}'.format(acc))
            delta_t = time.time() - start_time
            print("Inference complete. Duration: %g s" %(delta_t))
            return labels, preds, outputs

def train(config, restore = False):
    # Executes training operations
    print('Architecture: {}'.format(architecture))
    vDNN = vanillaDNN(config, training=True)
    start_time = time.time()
    global_step, v_auc_best = 0, 0.
    saver = tf.train.Saver()
    ckpt = tf.train.get_checkpoint_state(directories.checkpoints)

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) as sess:
        # Initialize variables
        init_op = tf.global_variables_initializer()
        pretrain_handle =
        train_handle =
        test_handle =
        plot_handle =
        if restore and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            print('{} restored.'.format(ckpt.model_checkpoint_path))
        train_init = {vDNN.features_placeholder: features, vDNN.labels_placeholder: labels, vDNN.auxillary_placeholder: auxillary}
        test_init = {vDNN.featuresTest_placeholder: featuresTest, vDNN.labelsTest_placeholder: labelsTest, vDNN.auxillaryTest_placeholder: auxillaryTest}
        pretrain_feed = {vDNN.training_phase: True, vDNN.handle: pretrain_handle}
        train_feed = {vDNN.training_phase: True, vDNN.handle: train_handle}, feed_dict=train_init), feed_dict=test_init), feed_dict=test_init)
        def pretrain(iterator, train_op, feed_dict_init, feed_dict_train, n_epochs):
            for epoch in range(n_epochs):
      , feed_dict=feed_dict_init)
                while True:
              , feed_dict=feed_dict_train)
                    except tf.errors.OutOfRangeError:

                plot_distributions(vDNN, epoch, sess, handle=plot_handle)
                run_diagnostics(vDNN, config, directories, sess, saver, train_handle, test_handle, global_step, 
                                config.nTrainExamples, start_time, v_auc_best, epoch)
#         pretrain(vDNN.pretrain_iterator, train_op=vDNN.predictor_train_op, feed_dict_init=train_init, feed_dict_train=pretrain_feed, n_epochs=5)
#         pretrain(vDNN.pretrain_iterator, train_op=vDNN.adversary_train_op, feed_dict_init=train_init, feed_dict_train=pretrain_feed, n_epochs=1)

        while True:
                if config.adversary:
                    # adversary trains in inner loop
                    if global_step % config.K == 0:
              , feed_dict=train_feed)
              , feed_dict=train_feed)

                    if global_step % (config.steps_per_epoch) == 0:
                        epoch, v_auc_best = run_adv_diagnostics(vDNN, config, directories, sess, saver, train_handle, test_handle, 
                                                            global_step, config.nTrainExamples, start_time, v_auc_best)
                        plot_distributions(vDNN, epoch, sess, handle=plot_handle)
                    # Run X steps on training dataset
          , feed_dict=train_feed)

                    if global_step % (config.steps_per_epoch // 4) == 0:
                        epoch, v_auc_best = run_diagnostics(vDNN, config, directories, sess, saver, train_handle, test_handle, 
                                                            global_step, config.nTrainExamples, start_time, v_auc_best)

            except tf.errors.OutOfRangeError:

        save_path =, os.path.join(directories.checkpoints, 'vDNN_{}_{}_end.ckpt'.format(config.mode,, global_step=epoch)
    print("Training Complete. Model saved to file: {} Time elapsed: {:.3f} s".format(save_path, time.time()-start_time))

train(config)#, restore=True)

Architecture: B2Xsy - adv-selu | Layers: 7 | Dropout: 0.95 | Base LR: 1e-05 | Epochs: 512
Building deltae adversary for background events.
Building mbc adversary for background events.
Graph saved to file: checkpoints/vDNN_adv-selu_B2Xsy_epoch0.ckpt-0
Epoch 0, Step 1321728 | Training Acc: 0.928 | Test Acc: 0.965 | Test Loss: 0.116 | Test AUC: 0.919 | Total loss: -8.198 | Rate: 295 examples/s (53.39 s) [*]
Post-selection: 75
Normalized BG deviation:  1.49418758256
Post-selection: 75
Normalized BG deviation:  1.46763540291
Epoch 1, Step 1321491 | Training Acc: 0.954 | Test Acc: 0.918 | Test Loss: 0.309 | Test AUC: 0.885 | Total loss: -7.484 | Rate: 1702 examples/s (113.03 s) 
Post-selection: 75
Normalized BG deviation:  1.41445927106
Post-selection: 75
Normalized BG deviation:  1.48496315475
Epoch 2, Step 1321254 | Training Acc: 0.966 | Test Acc: 0.930 | Test Loss: 0.170 | Test AUC: 0.884 | Total loss: -7.115 | Rate: 1795 examples/s (168.05 s) 
Post-selection: 76
Normalized BG deviation:  1.51754385965
Post-selection: 76
Normalized BG deviation:  1.51610369207
Epoch 3, Step 1321017 | Training Acc: 0.958 | Test Acc: 0.941 | Test Loss: 0.148 | Test AUC: 0.919 | Total loss: -6.754 | Rate: 1815 examples/s (225.12 s) 
Post-selection: 76
Normalized BG deviation:  1.54997705971
Post-selection: 76
Normalized BG deviation:  1.48718620961
Epoch 4, Step 1320780 | Training Acc: 0.945 | Test Acc: 0.945 | Test Loss: 0.188 | Test AUC: 0.914 | Total loss: -7.700 | Rate: 1822 examples/s (280.25 s) 
Post-selection: 76
Normalized BG deviation:  1.47230759171
Post-selection: 76
Normalized BG deviation:  1.49192440986
Epoch 5, Step 1320543 | Training Acc: 0.937 | Test Acc: 0.957 | Test Loss: 0.174 | Test AUC: 0.899 | Total loss: -7.259 | Rate: 1815 examples/s (335.90 s) 
Post-selection: 75
Normalized BG deviation:  1.51562189055
Post-selection: 75
Normalized BG deviation:  1.50262023217
Graph saved to file: best/vDNN_adv-selu_B2Xsy_epoch6.ckpt-6
Epoch 6, Step 1320306 | Training Acc: 0.954 | Test Acc: 0.957 | Test Loss: 0.185 | Test AUC: 0.925 | Total loss: -7.014 | Rate: 402 examples/s (390.52 s) [*]
Post-selection: 75
Normalized BG deviation:  1.58318413898
Post-selection: 75
Normalized BG deviation:  1.47523373782
Epoch 7, Step 1320069 | Training Acc: 0.958 | Test Acc: 0.945 | Test Loss: 0.172 | Test AUC: 0.923 | Total loss: -6.852 | Rate: 1822 examples/s (445.18 s) 
Post-selection: 75
Normalized BG deviation:  1.49705434567
Post-selection: 75
Normalized BG deviation:  1.49440656649
Epoch 8, Step 1319832 | Training Acc: 0.937 | Test Acc: 0.961 | Test Loss: 0.061 | Test AUC: 0.919 | Total loss: -7.074 | Rate: 1804 examples/s (499.33 s) 
Post-selection: 75
Normalized BG deviation:  1.51717305152
Post-selection: 75
Normalized BG deviation:  1.52232496697
Graph saved to file: best/vDNN_adv-selu_B2Xsy_epoch9.ckpt-9
Epoch 9, Step 1319595 | Training Acc: 0.945 | Test Acc: 0.949 | Test Loss: 0.153 | Test AUC: 0.926 | Total loss: -8.610 | Rate: 387 examples/s (553.67 s) [*]
Post-selection: 75
Normalized BG deviation:  1.51736041957
Post-selection: 75
Normalized BG deviation:  1.54126004116
Graph saved to file: best/vDNN_adv-selu_B2Xsy_epoch10.ckpt-10
Epoch 10, Step 1319358 | Training Acc: 0.970 | Test Acc: 0.957 | Test Loss: 0.124 | Test AUC: 0.948 | Total loss: -7.649 | Rate: 276 examples/s (609.67 s) [*]
Post-selection: 76
Normalized BG deviation:  1.45941345902
Post-selection: 76
Normalized BG deviation:  1.52422100026
Graph saved to file: best/vDNN_adv-selu_B2Xsy_epoch11.ckpt-11
Epoch 11, Step 1319121 | Training Acc: 0.975 | Test Acc: 0.973 | Test Loss: 0.076 | Test AUC: 0.965 | Total loss: -6.864 | Rate: 393 examples/s (663.86 s) [*]
Post-selection: 76
Normalized BG deviation:  1.52992069214
Post-selection: 76
Normalized BG deviation:  1.51471455725
Epoch 12, Step 1318884 | Training Acc: 0.954 | Test Acc: 0.965 | Test Loss: 0.105 | Test AUC: 0.945 | Total loss: -7.687 | Rate: 1777 examples/s (718.20 s) 
Post-selection: 76
Normalized BG deviation:  1.51520303407
Post-selection: 76
Normalized BG deviation:  1.46406852808
Epoch 13, Step 1318647 | Training Acc: 0.937 | Test Acc: 0.945 | Test Loss: 0.219 | Test AUC: 0.911 | Total loss: -8.724 | Rate: 1862 examples/s (772.63 s) 
Post-selection: 75
Normalized BG deviation:  1.4912106136
Post-selection: 75
Normalized BG deviation:  1.50898839138
Epoch 14, Step 1318410 | Training Acc: 0.937 | Test Acc: 0.934 | Test Loss: 0.140 | Test AUC: 0.928 | Total loss: -7.767 | Rate: 1800 examples/s (827.31 s) 
Post-selection: 75
Normalized BG deviation:  1.58159273258
Post-selection: 75
Normalized BG deviation:  1.52164975797
Epoch 15, Step 1318173 | Training Acc: 0.920 | Test Acc: 0.973 | Test Loss: 0.118 | Test AUC: 0.928 | Total loss: -7.739 | Rate: 1785 examples/s (882.41 s) 
Post-selection: 75
Normalized BG deviation:  1.49083206461
Post-selection: 75
Normalized BG deviation:  1.51783941219
Graph saved to file: checkpoints/vDNN_adv-selu_B2Xsy_epoch16.ckpt-16
Epoch 16, Step 1317936 | Training Acc: 0.970 | Test Acc: 0.965 | Test Loss: 0.099 | Test AUC: 0.954 | Total loss: -8.630 | Rate: 369 examples/s (938.07 s) 
Post-selection: 75
Normalized BG deviation:  1.50779392338
Post-selection: 75
Normalized BG deviation:  1.53051519155

train(config, restore=True)

Architecture: B2Xsy - adv-selu | Layers: 7 | Dropout: 0.95 | Base LR: 1e-05 | Epochs: 512
INFO:tensorflow:Restoring parameters from checkpoints/vDNN_adv-selu_B2Xsy_epoch48.ckpt-48
checkpoints/vDNN_adv-selu_B2Xsy_epoch48.ckpt-48 restored.
Graph saved to file: checkpoints/vDNN_adv-selu_B2Xsy_epoch0.ckpt-0
Epoch 0, Step 1321728 | Training Acc: 0.928 | Test Acc: 0.945 | Test Loss: 0.187 | Test AUC: 0.882 | Total loss: -10.989 | Rate: 299 examples/s (38.80 s) [*]
Post-selection: 6064
Epoch 1, Step 1321491 | Training Acc: 0.941 | Test Acc: 0.965 | Test Loss: 0.124 | Test AUC: 0.929 | Total loss: -14.267 | Rate: 1619 examples/s (79.92 s) [*]
Post-selection: 6064
Epoch 2, Step 1321254 | Training Acc: 0.907 | Test Acc: 0.914 | Test Loss: 0.301 | Test AUC: 0.866 | Total loss: -22.281 | Rate: 1747 examples/s (119.02 s) 
Post-selection: 6064
Epoch 3, Step 1321017 | Training Acc: 0.937 | Test Acc: 0.930 | Test Loss: 0.263 | Test AUC: 0.896 | Total loss: -21.931 | Rate: 1694 examples/s (157.98 s) 
Post-selection: 6064
Epoch 4, Step 1320780 | Training Acc: 0.924 | Test Acc: 0.910 | Test Loss: 0.249 | Test AUC: 0.880 | Total loss: -19.028 | Rate: 1787 examples/s (196.81 s) 
Post-selection: 6064
Epoch 5, Step 1320543 | Training Acc: 0.945 | Test Acc: 0.941 | Test Loss: 0.147 | Test AUC: 0.904 | Total loss: -17.739 | Rate: 1728 examples/s (235.63 s) 
Post-selection: 6064
Epoch 6, Step 1320306 | Training Acc: 0.911 | Test Acc: 0.965 | Test Loss: 0.101 | Test AUC: 0.898 | Total loss: -6.515 | Rate: 1765 examples/s (274.23 s) 
Post-selection: 6064
Epoch 7, Step 1320069 | Training Acc: 0.920 | Test Acc: 0.926 | Test Loss: 0.239 | Test AUC: 0.883 | Total loss: -0.150 | Rate: 1764 examples/s (313.15 s) 
Post-selection: 6064
Epoch 8, Step 1319832 | Training Acc: 0.945 | Test Acc: 0.922 | Test Loss: 0.175 | Test AUC: 0.892 | Total loss: -17.710 | Rate: 1660 examples/s (351.97 s) 
Post-selection: 6064
Epoch 9, Step 1319595 | Training Acc: 0.916 | Test Acc: 0.953 | Test Loss: 0.138 | Test AUC: 0.890 | Total loss: -11.581 | Rate: 1723 examples/s (390.77 s) 
Post-selection: 6064
Epoch 10, Step 1319358 | Training Acc: 0.907 | Test Acc: 0.930 | Test Loss: 0.223 | Test AUC: 0.843 | Total loss: -15.697 | Rate: 1671 examples/s (429.44 s) 
Post-selection: 6064
Epoch 11, Step 1319121 | Training Acc: 0.916 | Test Acc: 0.941 | Test Loss: 0.193 | Test AUC: 0.888 | Total loss: -18.096 | Rate: 1746 examples/s (468.07 s) 
Post-selection: 6064
Epoch 12, Step 1318884 | Training Acc: 0.941 | Test Acc: 0.965 | Test Loss: 0.141 | Test AUC: 0.910 | Total loss: -9.609 | Rate: 1747 examples/s (506.82 s) 
Post-selection: 6064
Epoch 13, Step 1318647 | Training Acc: 0.941 | Test Acc: 0.930 | Test Loss: 0.210 | Test AUC: 0.903 | Total loss: -12.618 | Rate: 1671 examples/s (546.47 s) 
Post-selection: 6064
Epoch 14, Step 1318410 | Training Acc: 0.949 | Test Acc: 0.918 | Test Loss: 0.275 | Test AUC: 0.898 | Total loss: -24.035 | Rate: 1633 examples/s (585.33 s) 
Post-selection: 6064
Epoch 15, Step 1318173 | Training Acc: 0.962 | Test Acc: 0.941 | Test Loss: 0.151 | Test AUC: 0.913 | Total loss: -12.024 | Rate: 1654 examples/s (625.10 s) 
Post-selection: 6064
Graph saved to file: best/vDNN_adv-selu_B2Xsy_epoch16.ckpt-16
Graph saved to file: checkpoints/vDNN_adv-selu_B2Xsy_epoch16.ckpt-16
Epoch 16, Step 1317936 | Training Acc: 0.945 | Test Acc: 0.969 | Test Loss: 0.107 | Test AUC: 0.936 | Total loss: -17.540 | Rate: 109 examples/s (666.46 s) [*]
Post-selection: 6064

Making Predictions

Classification on a new instance is given by the softmax of the output of the final readout layer.

import os
ckpt = tf.train.get_checkpoint_state(directories.checkpoints)
vDNN = vanillaDNN(config, training = False)
labels, preds, output = vDNN.predict(ckpt)

# Add predictions to test set as a new column, save as HDF5
output = pd.Series(output, name='preds')
test = pd.concat([pdf_test, output], axis=1)

INFO:tensorflow:Restoring parameters from checkpoints/vDNN_adv-selu_B2Xsy_epoch16.ckpt-16
checkpoints/vDNN_adv-selu_B2Xsy_epoch16.ckpt-16 restored.
Validation accuracy: 0.935
AUC: 0.9737824346375283
Plotting signal efficiency versus background rejection
Inference complete. Duration: 20.6718 s

bkg = test[test['labels']<0.5]
sig = test[test['labels']==1]

post_cut = bkg[bkg['probs']>0.9]
post_sig = sig[sig['probs']>0.9]

import matplotlib.pyplot as plt
import seaborn as sns
sns.distplot(sig['probs'], kde=False)

test.to_hdf(os.path.join(directories.checkpoints, 'adv_mbc{}_preds.h5'.format(os.path.basename(directories.test))), key = 'df', format='t', data_columns=True)

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns'seaborn-darkgrid')'seaborn-talk')'seaborn-pastel')

def normPlot(variable, pdf, epoch, signal, nbins=50, bkg_rejection=0.95):
    from scipy import stats
    titles={'mbc': r'$M_{bc}$ (GeV)', 'deltae': r'$\Delta E$ (GeV)', 'daughterInvM': r'$M_{X_q}$ (GeV)'}
    bkg = pdf[pdf['labels']<0.5]
    post_bkg = bkg.nlargest(int(bkg.shape[0]*(1-bkg_rejection)), columns=['preds'])
    threshold = post_bkg['preds'].min()
    print('Post-selection:', post_bkg.shape[0])
    if signal:
        sig = pdf[pdf['labels']==1]
        post_sig = pdf[(pdf['labels']==1) & (pdf['preds']>threshold)]
        sns.distplot(post_sig[variable], hist=True, kde=True, label='Signal - {} BG rejection'.format(bkg_rejection), bins=nbins,
                     hist_kws=dict(edgecolor="0.85", linewidth=0.5, alpha = 0.8))
        sns.distplot(sig[variable], hist=True, kde=True, label='Signal', bins=nbins,
                     hist_kws=dict(edgecolor="0.85", linewidth=0.5, alpha = 0.8))
        sns.distplot(post_bkg[variable], hist=True, kde=False, norm_hist=True, label='Background - {} BG rejection'.format(bkg_rejection), bins=nbins,# fit=stats.norm,
                     hist_kws=dict(edgecolor="0.85", linewidth=0.5, alpha = 0.8))
        sns.distplot(bkg[variable], hist=True, kde=True, label='Background', bins=nbins,
                     hist_kws=dict(edgecolor="0.85", linewidth=0.5, alpha = 0.8))
    plt.ylabel(r'Normalized events/bin')
    plt.legend(loc = "best")
    plt.savefig('best/{}_adv-ep{}-nb.pdf'.format(variable, epoch), bbox_inches='tight',format='pdf', dpi=1000)

normPlot('mbc', test, signal=False, epoch='talk_bkg_mbc_adv-nn')

Post-selection: 6064

