Author: Justin Tan
RNN alpha build for rare decay identification in TensorFlow.
In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import time, os
class config(object):
# Set network parameters
# Empirically, depth more important than layer size - output dimension
mode = 'continuum'
channel = 'rho0'
n_particles = 5
n_features = 100
seq_length = n_features/n_particles
steps_per_epoch = 1e3
rnn_cell = 'lru_cell' # 'gru'
hidden_units = 512 # Number of neurons per RNN Cell
keep_prob = 0.85
input_keep_prob = 0.85
recurrent_keep_prob = 0.85
num_epochs = 64
batch_size = 128
num_layers = 3 # Note: 3 layers is considered 'deep'
learning_rate = 1e-4
lr_epoch_decay = 0.96
n_classes = 2
class directories(object):
data = 'data'
tensorboard = 'tensorboard'
checkpoints = 'checkpoints'
samples = 'samples'
architecture = '{} - {} | Base cell: {} | Hidden units: {} | Layers: {} | Batch: {} | Epochs: {}'.format(
config.channel, config.mode, config.rnn_cell, config.hidden_units, config.num_layers, config.batch_size, config.num_epochs)
class reader():
def __init__(self, df):
self.df = df
self.batch_size = config.batch_size
self.steps_per_epoch = len(df) // config.batch_size
self.epochs = 0
self.proceed = True
self.shuffle()
def shuffle(self):
self.df = self.df.sample(frac=1).reset_index(drop=True)
self.df_X = self.df.drop('Labels', axis = 1)
self.df_y = self.df['Labels']
self.pointer = 0
def next_batch(self, batch_size):
if self.pointer + 1 >= self.steps_per_epoch:
inputs = self.df_X.iloc[self.pointer*batch_size:]
targets = self.df_y.iloc[self.pointer*batch_size:]
self.epochs += 1
self.shuffle()
self.proceed = False
inputs = self.df_X.iloc[self.pointer*batch_size:(self.pointer+1)*batch_size]
targets = self.df_y.iloc[self.pointer*batch_size:(self.pointer+1)*batch_size]
self.pointer += 1
return inputs, targets
def save_summary(config, delta_t, train_acc, test_acc):
import json
summary = {
'Timestamp': time.strftime('%c'),
'Base cell': config.rnn_cell,
'Hidden units': config.hidden_units,
'Layers': config.num_layers,
'Batch_size': config.batch_size,
'Seq_length': config.seq_length,
'Dropout': config.keep_prob,
'Epochs': config.num_epochs,
'Time': delta_t,
'Final train acc': train_acc,
'Final test acc': test_acc
}
# Writing JSON data
if os.path.isfile('rnn_summary.json'):
with open('rnn_summary_{}.json.format(config.name)', 'r+') as f:
new = json.load(f)
new.append(summary)
with open('rnn_summary.json', 'w') as f:
json.dump(new, f, indent = 4)
else:
with open('rnn_summary.json', 'w') as f:
json.dump([summary], f, indent = 4)
def load_data(file_name, test_size = 0.05):
from sklearn.model_selection import train_test_split
df = pd.read_hdf(file_name, 'df')
df_X_train, df_X_test, df_y_train, df_y_test = train_test_split(df.drop('Labels', axis = 1),
df['Labels'], test_size = test_size, random_state=42)
return df_X_train, df_X_test, df_y_train, df_y_test
In [2]:
test_file = '/home/ubuntu/radiative/data/rnn/rnn_B02rho0gamma_continuum.h5'
df_X_train, df_X_test, df_y_train, df_y_test = load_data(test_file)
config.n_features = df_X_train.shape[1]
config.seq_length = config.n_features//config.n_particles
config.steps_per_epoch = len(df_X_train) // config.batch_size
assert config.seq_length == config.n_features/config.n_particles, 'Discrepancy in input feature dimension'
df_train = pd.concat([df_X_train, df_y_train], axis = 1)
df_test = pd.concat([df_X_test, df_y_test], axis = 1)
In [3]:
readerTrain = reader(df_train)
readerTest = reader(df_test)
In [20]:
x.values.reshape([-1,config.n_particles, config.seq_length])[0]
Out[20]:
In [4]:
def cell_dropout(base_cell, keep_prob):
# Apply dropout between RNN layers - only on the output
cell_dropout = tf.contrib.rnn.DropoutWrapper(base_cell, output_keep_prob=keep_prob)
return cell_dropout
def layer_weights(shape, name = 'weights'):
# Return weight tensor of given shape using Xavier initialization
W = tf.get_variable(name, shape = shape, initializer=tf.contrib.layers.xavier_initializer())
return W
def layer_biases(shape, name = 'biases'):
# Return bias tensor of given shape with small initialized constant value
b = tf.get_variable(name, shape = shape, initializer = tf.constant_initializer(0.01))
return b
class alphaRNN():
def __init__(self, config, training = True):
self.config = config
self.scope = 'alpha'
# Placeholders for feed_dict
self.inputs = tf.placeholder(tf.float32, shape = [None, self.config.n_features])
self.targets = tf.placeholder(tf.int32, shape = [None])
self.keep_prob = tf.placeholder(tf.float32) # Dropout on input connections
# Reshape input to batch_size x n_particles x seq_length tensor
rnn_inputs = tf.reshape(self.inputs, [-1, config.n_particles, config.seq_length])
# Place operations necessary to perform inference onto graph
if config.rnn_cell == 'lstm':
base_cell = tf.contrib.rnn.LSTMCell(num_units = config.hidden_units, forget_bias = 1.0, state_is_tuple = True)
elif config.rnn_cell == 'gru':
base_cell = tf.contrib.rnn.GRUCell(num_units = config.hidden_units)
elif config.rnn_cell == 'layer-norm':
base_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(num_units = config.hidden_units,
forget_bias = 1.0, dropout_keep_prob = self.config.recurrent_keep_prob)
else:
base_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(num_units = config.hidden_units,
forget_bias = 1.0, dropout_keep_prob = self.config.recurrent_keep_prob)
self.cell = base_cell
# Apply Dropout operator on non-recurrent connections
if training and self.config.input_keep_prob < 1:
rnn_inputs = tf.nn.dropout(rnn_inputs, self.keep_prob)
self.cell = tf.contrib.rnn.DropoutWrapper(base_cell, input_keep_prob=self.config.input_keep_prob)
# Wrap stacked cells into a single cell
self.multicell = tf.contrib.rnn.MultiRNNCell(
[self.cell for _ in range(config.num_layers)], state_is_tuple=True)
# Accept previous hidden state as input
self.init_state = self.multicell.zero_state(self.config.batch_size, tf.float32)
# Outputs shaped [batch_size, max_time, cell.output_size]
rnn_outputs, final_state = tf.nn.dynamic_rnn(
cell = self.multicell, inputs = rnn_inputs, initial_state = self.init_state, scope = self.scope)
# Extract output from last time step
output = rnn_outputs[:,-1,:]
with tf.variable_scope('softmax'):
softmax_W = layer_weights(shape = [config.hidden_units, config.n_classes], name = 'smx_W')
softmax_b = layer_biases(shape = [config.n_classes], name = 'smx_b')
self.logits_RNN = tf.matmul(output, softmax_W) + softmax_b # Unormalized log probabilties for next char
self.predictions = tf.nn.softmax(self.logits_RNN)
self.cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits = self.logits_RNN, labels = self.targets))
tf.summary.scalar('cross_entropy', self.cross_entropy)
# Anneal learning rate
global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(config.learning_rate, global_step,
decay_steps = config.steps_per_epoch, decay_rate = config.lr_epoch_decay, staircase=True)
self.train_op = tf.train.AdamOptimizer(config.learning_rate).minimize(self.cross_entropy, name = 'optimizer',
global_step = global_step)
# Evaluate correctness
correct_prediction = tf.equal(tf.cast(tf.argmax(self.predictions, 1), tf.int32), self.targets)
self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar('accuracy', self.accuracy)
In [5]:
def train(config, restore = False):
pRNN = alphaRNN(config, training = True)
start_time = time.time()
v_acc_best = 0.
saver = tf.train.Saver()
merge_op = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(
os.path.join(directories.tensorboard, 'train_{}'.format(time.strftime('%d-%m_%I:%M'))), graph = tf.get_default_graph())
test_writer = tf.summary.FileWriter(os.path.join(directories.tensorboard, 'test_{}'.format(time.strftime('%d-%m_%I:%M'))))
ckpt = tf.train.get_checkpoint_state(directories.checkpoints)
with tf.Session() as sess:
# Initialize variables
init_op = tf.global_variables_initializer()
sess.run(init_op)
if restore and ckpt.model_checkpoint_path:
print('{} restored.'.format(ckpt.model_checkpoint_path))
# saver = tf.train.import_meta_graph('checkpoints/char-RNN__epoch49.ckpt-49.meta')
# saver.restore(sess, 'checkpoints/char-RNN__epoch49.ckpt-49')
saver.restore(sess, ckpt.model_checkpoint_path)
for epoch in range(config.num_epochs):
readerTrain.proceed = True
begin = time.time()
step = 0
print('(*) Entering Epoch {} ({:.3f} s)'.format(epoch, time.time() - start_time))
# Save every 10 epochs
if epoch % 10 == 0:
save_path = saver.save(sess,
os.path.join(directories.checkpoints,'pRNN_{}_{}_epoch{}.ckpt'.format(config.mode, config.channel, epoch)),
global_step = epoch)
print('Graph saved to file: {}'.format(save_path))
while(readerTrain.proceed):
# Iterate through entire corpus
x_train, y_train = readerTrain.next_batch(config.batch_size)
feed_dict_train = {pRNN.inputs: x_train.values, pRNN.targets: y_train.values, pRNN.keep_prob: config.keep_prob}
t_op = sess.run(pRNN.train_op, feed_dict = feed_dict_train)
step += 1
if step % (config.steps_per_epoch // 10) == 0:
# Evaluate model
improved = ''
x_test, y_test = readerTest.next_batch(config.batch_size)
feed_dict_test = {pRNN.inputs: x_test.values, pRNN.targets: y_test.values, pRNN.keep_prob: 1.0}
t_acc, t_loss, t_summary = sess.run([pRNN.accuracy, pRNN.cross_entropy, merge_op],
feed_dict = feed_dict_train)
v_acc, v_loss, v_summary, = sess.run([pRNN.accuracy, pRNN.cross_entropy, merge_op],
feed_dict = feed_dict_test)
train_writer.add_summary(t_summary, step)
test_writer.add_summary(v_summary, step)
if epoch > 5 and v_acc > v_acc_best:
v_acc_best = v_acc
improved = '*'
save_path = saver.save(sess, os.path.join(directories.checkpoints, 'best.ckpt'), global_step = epoch)
print('Epoch {}, Step {} | Training Accuracy: {:.3f} | Test Accuracy: {:.3f} | Training Loss: {:.3f} | Test Loss {:.3f} {}'
.format(epoch, step, t_acc, v_acc, t_loss, v_loss, improved))
save_path = saver.save(sess, os.path.join(directories.checkpoints, 'pRNN_end'),
global_step = epoch)
print('Metagraph saved to file: {}'.format(save_path))
final_train_accuracy = pRNN.accuracy.eval(feed_dict = {pRNN.inputs: df_X_train.values, pRNN.targets: df_y_train.values, pRNN.keep_prob: 1.0})
final_test_accuracy = pRNN.accuracy.eval(feed_dict = {pRNN.inputs: df_X_test.values, pRNN.targets: df_y_test.values, pRNN.keep_prob: 1.0})
delta_t = time.time() - start_time
print("Training Complete. Time elapsed: {:.3f} s".format(delta_t))
print("Train accuracy: %g\nValidation accuracy: %g" %(final_train_accuracy, final_test_accuracy))
print('Architecture: {}'.format(architecture))
save_summary(config, delta_t, final_train_accuracy, final_test_accuracy)
In [ ]:
train(config)#, restore = True)