alpha-RNN

Author: Justin Tan

RNN alpha build for rare decay identification in TensorFlow.


In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import time, os

class config(object):
    # Set network parameters
    # Empirically, depth more important than layer size - output dimension
    mode = 'continuum'
    channel = 'rho0'
    n_particles = 5
    n_features = 100
    seq_length = n_features/n_particles
    steps_per_epoch = 1e3
    rnn_cell = 'lru_cell' # 'gru'
    hidden_units = 512  # Number of neurons per RNN Cell
    keep_prob = 0.85
    input_keep_prob = 0.85
    recurrent_keep_prob = 0.85
    num_epochs = 64
    batch_size = 128
    num_layers = 3 # Note: 3 layers is considered 'deep'
    learning_rate = 1e-4
    lr_epoch_decay = 0.96
    n_classes = 2

class directories(object):
    data = 'data'
    tensorboard = 'tensorboard'
    checkpoints = 'checkpoints'
    samples = 'samples'
    
architecture = '{} - {} | Base cell: {} | Hidden units: {} | Layers: {} | Batch: {} | Epochs: {}'.format(
    config.channel, config.mode, config.rnn_cell, config.hidden_units, config.num_layers, config.batch_size, config.num_epochs)

class reader():
    def __init__(self, df):
        
        self.df = df
        self.batch_size = config.batch_size
        self.steps_per_epoch = len(df) // config.batch_size
        self.epochs = 0
        self.proceed = True
        self.shuffle()

    def shuffle(self):
        self.df = self.df.sample(frac=1).reset_index(drop=True)
        self.df_X = self.df.drop('Labels', axis = 1)
        self.df_y = self.df['Labels']
        self.pointer = 0

    def next_batch(self, batch_size):
        if self.pointer + 1 >= self.steps_per_epoch:
            inputs = self.df_X.iloc[self.pointer*batch_size:]
            targets = self.df_y.iloc[self.pointer*batch_size:]
            self.epochs += 1
            self.shuffle()
            self.proceed = False
            
        inputs = self.df_X.iloc[self.pointer*batch_size:(self.pointer+1)*batch_size]
        targets = self.df_y.iloc[self.pointer*batch_size:(self.pointer+1)*batch_size]
        self.pointer += 1
                
        return inputs, targets

def save_summary(config, delta_t, train_acc, test_acc):
    import json
    summary = {
        'Timestamp': time.strftime('%c'),
        'Base cell': config.rnn_cell,
        'Hidden units': config.hidden_units,
        'Layers': config.num_layers,
        'Batch_size': config.batch_size,
        'Seq_length': config.seq_length,
        'Dropout': config.keep_prob,
        'Epochs': config.num_epochs,
        'Time': delta_t,
        'Final train acc': train_acc,
        'Final test acc': test_acc
    }
    # Writing JSON data
    if os.path.isfile('rnn_summary.json'):
        with open('rnn_summary_{}.json.format(config.name)', 'r+') as f:
            new = json.load(f)
        new.append(summary)
        with open('rnn_summary.json', 'w') as f:
            json.dump(new, f, indent = 4)
    else:
        with open('rnn_summary.json', 'w') as f:
             json.dump([summary], f, indent = 4)

def load_data(file_name, test_size = 0.05):
    from sklearn.model_selection import train_test_split
    df = pd.read_hdf(file_name, 'df')
    df_X_train, df_X_test, df_y_train, df_y_test = train_test_split(df.drop('Labels', axis = 1),
                                                                    df['Labels'], test_size = test_size, random_state=42)
    return df_X_train, df_X_test, df_y_train, df_y_test

Read Data


In [2]:
test_file = '/home/ubuntu/radiative/data/rnn/rnn_B02rho0gamma_continuum.h5'

df_X_train, df_X_test, df_y_train, df_y_test = load_data(test_file)
config.n_features = df_X_train.shape[1]
config.seq_length = config.n_features//config.n_particles
config.steps_per_epoch = len(df_X_train) // config.batch_size
assert config.seq_length == config.n_features/config.n_particles, 'Discrepancy in input feature dimension'

df_train = pd.concat([df_X_train, df_y_train], axis = 1)
df_test = pd.concat([df_X_test, df_y_test], axis = 1)

In [3]:
readerTrain = reader(df_train)
readerTest = reader(df_test)

In [20]:
x.values.reshape([-1,config.n_particles, config.seq_length])[0]


Out[20]:
array([[  5.12394619e+00,  -7.94767961e-02,  -3.48720968e-01,
          6.89857081e-02,   3.57663065e-01,   3.64255279e-01,
         -1.79487824e+00,   1.89388365e-01,   1.48642960e-03,
          1.24982413e-04,   7.74669519e-04,   2.25706212e-03,
          1.34642087e-02,   5.65924263e+00,   1.54790401e+00,
          2.96310177e+01,   1.13851771e-01,   5.11098242e+00],
       [  2.61728239e+00,   2.04600549e+00,  -4.84532654e-01,
          1.39081919e+00,   2.10259604e+00,   2.52096987e+00,
         -2.32534915e-01,   5.51700056e-01,   1.53519592e-04,
          7.49543105e-05,   5.93450794e-04,   7.81479699e-04,
          5.92091560e-01,   7.80361032e+00,   1.96310759e+00,
          5.70425415e+01,  -7.25154400e-01,   7.03476071e-01],
       [  2.05967164e+00,   1.79474211e+00,  -2.70597786e-01,
          9.63558376e-01,   1.81502676e+00,   2.05493712e+00,
         -1.49645388e-01,   4.68899161e-01,   1.85109839e-05,
          3.85362773e-06,   1.39815065e-05,   2.92153127e-05,
          5.07277668e-01,   8.51536846e+00,   2.03612328e+00,
          6.83656921e+01,  -4.76340562e-01,   1.39570177e-01],
       [  5.55337667e-01,   2.50287563e-01,  -2.13602960e-01,
          4.25029397e-01,   3.29044223e-01,   5.37512839e-01,
         -7.06482053e-01,   7.90733457e-01,   1.35387323e-04,
          7.06458086e-05,   5.85658359e-04,   7.56367692e-04,
          1.00901210e+00,   1.02972803e+01,   2.41617870e+00,
          1.00196068e+02,  -7.75152564e-01,   1.39570177e-01],
       [  2.50666380e+00,  -2.12548232e+00,   1.35811701e-01,
         -1.32183349e+00,   2.12981701e+00,   2.50666380e+00,
          3.07778263e+00,  -5.27327776e-01,   1.33290945e-03,
          5.00281240e-05,   1.81218755e-04,   1.47558260e-03,
         -5.86436391e-01,   8.86163235e+00,   4.43173409e+00,
          5.88882523e+01,   1.00000000e+00,   0.00000000e+00]], dtype=float32)

RNN construction


In [4]:
def cell_dropout(base_cell, keep_prob):
    # Apply dropout between RNN layers - only on the output
    cell_dropout = tf.contrib.rnn.DropoutWrapper(base_cell, output_keep_prob=keep_prob)
    return cell_dropout

def layer_weights(shape, name = 'weights'):
    # Return weight tensor of given shape using Xavier initialization
    W = tf.get_variable(name, shape = shape, initializer=tf.contrib.layers.xavier_initializer())
    return W

def layer_biases(shape, name = 'biases'):
    # Return bias tensor of given shape with small initialized constant value
    b = tf.get_variable(name, shape = shape, initializer = tf.constant_initializer(0.01))
    return b

class alphaRNN():
    def __init__(self, config, training = True):
        self.config = config
        self.scope = 'alpha'
                
        # Placeholders for feed_dict
        self.inputs = tf.placeholder(tf.float32, shape = [None, self.config.n_features])
        self.targets = tf.placeholder(tf.int32, shape = [None])
        self.keep_prob = tf.placeholder(tf.float32) # Dropout on input connections
        
        # Reshape input to batch_size x n_particles x seq_length tensor
        rnn_inputs = tf.reshape(self.inputs, [-1, config.n_particles, config.seq_length])
        
        # Place operations necessary to perform inference onto graph
        if config.rnn_cell == 'lstm':
            base_cell = tf.contrib.rnn.LSTMCell(num_units = config.hidden_units, forget_bias = 1.0, state_is_tuple = True)
        elif config.rnn_cell == 'gru':
            base_cell = tf.contrib.rnn.GRUCell(num_units = config.hidden_units)
        elif config.rnn_cell == 'layer-norm':
            base_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(num_units = config.hidden_units,
                                                          forget_bias = 1.0, dropout_keep_prob = self.config.recurrent_keep_prob)
        else:
            base_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(num_units = config.hidden_units,
                                                          forget_bias = 1.0, dropout_keep_prob = self.config.recurrent_keep_prob)
        self.cell = base_cell
        # Apply Dropout operator on non-recurrent connections
        if training and self.config.input_keep_prob < 1:
            rnn_inputs = tf.nn.dropout(rnn_inputs, self.keep_prob)
            self.cell = tf.contrib.rnn.DropoutWrapper(base_cell, input_keep_prob=self.config.input_keep_prob)

        # Wrap stacked cells into a single cell
        self.multicell = tf.contrib.rnn.MultiRNNCell(
            [self.cell for _ in range(config.num_layers)], state_is_tuple=True)

        # Accept previous hidden state as input
        self.init_state = self.multicell.zero_state(self.config.batch_size, tf.float32)

        # Outputs shaped [batch_size, max_time, cell.output_size]
        rnn_outputs, final_state = tf.nn.dynamic_rnn(
            cell = self.multicell, inputs = rnn_inputs, initial_state = self.init_state, scope = self.scope)
        
        # Extract output from last time step
        output = rnn_outputs[:,-1,:]

        with tf.variable_scope('softmax'):
            softmax_W = layer_weights(shape = [config.hidden_units, config.n_classes], name = 'smx_W')
            softmax_b = layer_biases(shape = [config.n_classes], name = 'smx_b')

        self.logits_RNN = tf.matmul(output, softmax_W) + softmax_b  # Unormalized log probabilties for next char
        self.predictions = tf.nn.softmax(self.logits_RNN)
        
        self.cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits = self.logits_RNN, labels = self.targets))
        tf.summary.scalar('cross_entropy', self.cross_entropy)
        
        # Anneal learning rate
        global_step = tf.Variable(0, trainable=False)
        learning_rate = tf.train.exponential_decay(config.learning_rate, global_step,
                                                       decay_steps = config.steps_per_epoch, decay_rate = config.lr_epoch_decay, staircase=True)

        self.train_op = tf.train.AdamOptimizer(config.learning_rate).minimize(self.cross_entropy, name = 'optimizer',
                                                                              global_step = global_step)  
        
        # Evaluate correctness
        correct_prediction = tf.equal(tf.cast(tf.argmax(self.predictions, 1), tf.int32), self.targets)
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar('accuracy', self.accuracy)

In [5]:
def train(config, restore = False):
    
    pRNN = alphaRNN(config, training = True)
    start_time = time.time()
    v_acc_best = 0.
    saver = tf.train.Saver()
    merge_op = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(
        os.path.join(directories.tensorboard, 'train_{}'.format(time.strftime('%d-%m_%I:%M'))), graph = tf.get_default_graph())
    test_writer = tf.summary.FileWriter(os.path.join(directories.tensorboard, 'test_{}'.format(time.strftime('%d-%m_%I:%M'))))
    ckpt = tf.train.get_checkpoint_state(directories.checkpoints)
    
    with tf.Session() as sess:
        # Initialize variables
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        
        if restore and ckpt.model_checkpoint_path:
            print('{} restored.'.format(ckpt.model_checkpoint_path))
#             saver = tf.train.import_meta_graph('checkpoints/char-RNN__epoch49.ckpt-49.meta')
#             saver.restore(sess, 'checkpoints/char-RNN__epoch49.ckpt-49')
            saver.restore(sess, ckpt.model_checkpoint_path)
                    
        for epoch in range(config.num_epochs):
            
            readerTrain.proceed = True
            begin = time.time()
            step = 0
            print('(*) Entering Epoch {} ({:.3f} s)'.format(epoch, time.time() - start_time))

            # Save every 10 epochs    
            if epoch % 10 == 0:
                save_path = saver.save(sess,
                                       os.path.join(directories.checkpoints,'pRNN_{}_{}_epoch{}.ckpt'.format(config.mode, config.channel, epoch)),
                                       global_step = epoch)
                print('Graph saved to file: {}'.format(save_path))

            while(readerTrain.proceed):
                # Iterate through entire corpus
                x_train, y_train = readerTrain.next_batch(config.batch_size)
                feed_dict_train = {pRNN.inputs: x_train.values, pRNN.targets: y_train.values, pRNN.keep_prob: config.keep_prob}
                t_op = sess.run(pRNN.train_op, feed_dict = feed_dict_train)
                step += 1

                if step % (config.steps_per_epoch // 10) == 0:            
                    # Evaluate model
                    improved = ''
                    x_test, y_test = readerTest.next_batch(config.batch_size)
                    feed_dict_test = {pRNN.inputs: x_test.values, pRNN.targets: y_test.values, pRNN.keep_prob: 1.0}

                    t_acc, t_loss, t_summary = sess.run([pRNN.accuracy, pRNN.cross_entropy, merge_op],
                                                        feed_dict = feed_dict_train)
                    v_acc, v_loss, v_summary, = sess.run([pRNN.accuracy, pRNN.cross_entropy, merge_op],
                                                        feed_dict = feed_dict_test)

                    train_writer.add_summary(t_summary, step)
                    test_writer.add_summary(v_summary, step)
                    
                    if epoch > 5 and v_acc > v_acc_best:
                        v_acc_best = v_acc
                        improved = '*'
                        save_path = saver.save(sess, os.path.join(directories.checkpoints, 'best.ckpt'), global_step = epoch)
                    
                    print('Epoch {}, Step {} | Training Accuracy: {:.3f} | Test Accuracy: {:.3f} | Training Loss: {:.3f} | Test Loss {:.3f} {}'
                          .format(epoch, step, t_acc, v_acc, t_loss, v_loss, improved))

        save_path = saver.save(sess, os.path.join(directories.checkpoints, 'pRNN_end'),
                               global_step = epoch)
        print('Metagraph saved to file: {}'.format(save_path))

        final_train_accuracy = pRNN.accuracy.eval(feed_dict = {pRNN.inputs: df_X_train.values, pRNN.targets: df_y_train.values, pRNN.keep_prob: 1.0})
        final_test_accuracy = pRNN.accuracy.eval(feed_dict = {pRNN.inputs: df_X_test.values, pRNN.targets: df_y_test.values, pRNN.keep_prob: 1.0})
        delta_t = time.time() - start_time
            
    print("Training Complete. Time elapsed: {:.3f} s".format(delta_t))
    print("Train accuracy: %g\nValidation accuracy: %g" %(final_train_accuracy, final_test_accuracy))

    print('Architecture: {}'.format(architecture))
    save_summary(config, delta_t, final_train_accuracy, final_test_accuracy)

In [ ]:
train(config)#, restore = True)


(*) Entering Epoch 0 (2.085 s)
Graph saved to file: checkpoints/pRNN_continuum_rho0_epoch0.ckpt-0