Author: Justin Tan
To-do: Update to TF 1.2, fused batch norm
Implements relational network as described in https://arxiv.org/pdf/1706.01427.pdf
In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import time, os
import json
class config(object):
# Set network parameters
mode = 'continuum'
channel = 'rho0'
n_features = 100
keep_prob = 0.85
num_epochs = 128
batch_size = 128
steps_per_epoch = 0
learning_rate = 1e-4
lr_epoch_decay = 0.98
n_classes = 2
cardinality = 5
n_obj_features = 20
class g_config(object):
n_layers = 4,
nodes = [1024, 1024, 1024, 512]
keep_prob = 0.95
class f_config(object):
n_layers = 3,
nodes = [1024, 1024, 512, 256]
keep_prob = 0.95
class directories(object):
data = 'data'
tensorboard = 'tensorboard'
checkpoints = 'checkpoints'
class reader():
# Iterates over data and returns batches
def __init__(self, df):
self.df = df
self.batch_size = config.batch_size
self.steps_per_epoch = len(df) // config.batch_size
self.epochs = 0
self.proceed = True
self.shuffle()
def shuffle(self):
self.df = self.df.sample(frac=1).reset_index(drop=True)
self.df_X = self.df.drop('Labels', axis = 1)
self.df_y = self.df['Labels']
self.pointer = 0
def next_batch(self, batch_size):
if self.pointer + 1 >= self.steps_per_epoch:
inputs = self.df_X.iloc[self.pointer*batch_size:]
targets = self.df_y.iloc[self.pointer*batch_size:]
self.epochs += 1
self.shuffle()
self.proceed = False
inputs = self.df_X.iloc[self.pointer*batch_size:(self.pointer+1)*batch_size]
targets = self.df_y.iloc[self.pointer*batch_size:(self.pointer+1)*batch_size]
self.pointer += 1
return inputs, targets
In [2]:
def load_data(file_name, test_size = 0.05):
from sklearn.model_selection import train_test_split
df = pd.read_hdf(file_name, 'df')
df_X_train, df_X_test, df_y_train, df_y_test = train_test_split(df.drop('Labels', axis = 1),
df['Labels'], test_size = test_size, random_state=42)
return df_X_train, df_X_test, df_y_train, df_y_test
def save_summary(config, delta_t, train_acc, test_acc, test_auc):
summary = {
'Channel': config.channel,
'Mode': config.mode,
'Timestamp': time.strftime('%c'),
'Hidden units': config.hidden_units,
'Layers': config.n_layers,
'Batch_size': config.batch_size,
'Dropout': config.keep_prob,
'Epochs': config.num_epochs,
'Time': delta_t,
'Final train acc': train_acc,
'Final test acc': test_acc,
'Final test AUC': test_auc
}
# Writing JSON data
if os.path.isfile('vdnn_summary.json'):
with open('vdnn_summary.json.', 'r+') as f:
new = json.load(f)
new.append(summary)
with open('vdnn_summary.json', 'w') as f:
json.dump(new, f, indent = 4)
else:
with open('vdnn_summary.json', 'w') as f:
json.dump([summary], f, indent = 4)
def layer_weights(shape):
# Return weight tensor of given shape using Xavier initialization
W = tf.get_variable("weights", shape = shape, initializer=tf.contrib.layers.xavier_initializer())
return W
def layer_biases(shape):
# Return bias tensor of given shape with small initialized constant value
b = tf.get_variable("biases", shape = shape, initializer = tf.constant_initializer(0.001))
return b
def hidden_layer_ops(x, shape, name, keep_prob, activation=tf.nn.relu):
# Add operations to graph to construct hidden layers
with tf.variable_scope(name) as scope:
# scope.reuse_variables() # otherwise tf.get_variable() checks that already existing vars are not shared by accident
weights = layer_weights(shape = shape)
biases = layer_biases(shape = [shape[1]])
# Apply non-linearity. Default is ReLU
actv = activation(tf.matmul(x, weights) + biases)
layer_output = tf.nn.dropout(actv, keep_prob)
return layer_output
def readout_ops(x, shape, name):
# Don't apply non-linearity, dropout on output layer
with tf.variable_scope(name) as scope:
weights = layer_weights(shape = shape)
biases = layer_biases(shape = [shape[1]])
layer_output = tf.matmul(x, weights) + biases
return layer_output
def hidden_layer_ops(x, shape, name, keep_prob, activation=tf.nn.relu):
with tf.variable_scope(name) as scope:
# scope.reuse_variables() # otherwise tf.get_variable() checks that already existing vars are not shared by accident
weights = layer_weights(shape = shape)
biases = layer_biases(shape = [shape[1]])
# Apply non-linearity. Default is ReLU
actv = activation(tf.matmul(x, weights) + biases)
layer_output = tf.nn.dropout(actv, keep_prob)
return layer_output
def BN_layer_ops(x, shape, name, keep_prob, phase, activation=tf.nn.relu):
with tf.variable_scope(name) as scope:
# scope.reuse_variables() # otherwise tf.get_variable() checks that already existing vars are not shared by accident
weights = layer_weights(shape = shape)
biases = layer_biases(shape = [shape[1]])
z_BN = tf.matmul(x, weights) + biases
# Place BN transform before non-linearity - update to TF 1.2!
theta_BN = tf.contrib.layers.batch_norm(z_BN, center=True, scale=True, is_training=phase,
decay=0.95, zero_debias_moving_mean=True, scope='bn', fused = True)
BN_actv = activation(theta_BN)
BN_layer_output = tf.nn.dropout(BN_actv, keep_prob)
return BN_layer_output
def build_network(x, n_layers, hidden_layer_nodes, training_phase):
assert n_layers == len(hidden_layer_nodes), 'Specified layer nodes and number of layers do not correspond.'
layers = [x]
with tf.variable_scope('BN_layers') as scope:
hidden_1 = BN_layer_ops(x, shape = [config.n_features, hidden_layer_nodes[0]], name = 'BNhidden0',
keep_prob = config.keep_prob, phase = training_phase)
layers.append(hidden_1)
for n in range(1,n_layers-1):
hidden_n = BN_layer_ops(layers[-1], shape = [hidden_layer_nodes[n], hidden_layer_nodes[n+1]], name = 'BNhidden{}'.format(n),
keep_prob = config.keep_prob, phase = training_phase)
layers.append(hidden_n)
readout = readout_ops(layers[-1], shape = [hidden_layer_nodes[-1], config.n_classes], name = 'readout')
layers.append(readout)
return layers
def plot_ROC_curve(y_true, y_pred, meta = ''):
from sklearn.metrics import roc_curve, auc
# Compute ROC curve, integrate
fpr, tpr, thresholds = roc_curve(y_true, y_pred)
roc_auc = auc(fpr, tpr)
plt.figure()
plt.axes([.1,.1,.8,.7])
plt.figtext(.5,.9, r'$\mathrm{Receiver \;Operating \;Characteristic}$', fontsize=15, ha='center')
plt.figtext(.5,.85, meta, fontsize=10,ha='center')
plt.plot(fpr, tpr, color='darkorange',
lw=2, label='ROC (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=1.0, linestyle='--')
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel(r'$\mathrm{False \;Positive \;Rate}$')
plt.ylabel(r'$\mathrm{True \;Positive \;Rate}$')
plt.legend(loc="lower right")
plt.savefig(os.path.join('graphs', '{}_{}_ROC.pdf'.format(args.channel, args.mode), format='pdf', dpi=1000))
plt.show()
plt.gcf().clear()
def g_network(x, training_phase, i, j):
g_hidden_1 = BN_layer_ops(x, shape = [x.get_shape()[1], g_config.nodes[0]], name = 'g_{}_hidden1'.format(i,j),
keep_prob = g_config.keep_prob, phase = training_phase)
g_hidden_2 = BN_layer_ops(g_hidden_1, shape = [g_config.nodes[0], g_config.nodes[1]], name = 'g_{}_hidden2'.format(i,j),
keep_prob = g_config.keep_prob, phase = training_phase)
g_hidden_3 = BN_layer_ops(g_hidden_2, shape = [g_config.nodes[1], g_config.nodes[2]], name = 'g_{}_hidden3'.format(i,j),
keep_prob = g_config.keep_prob, phase = training_phase)
g_hidden_4 = BN_layer_ops(g_hidden_3, shape = [g_config.nodes[2], g_config.nodes[3]], name = 'g_{}_hidden4'.format(i,j),
keep_prob = g_config.keep_prob, phase = training_phase)
return g_hidden_4
def f_network(x, training_phase):
f_hidden_1 = BN_layer_ops(x, shape = [x.get_shape()[1], f_config.nodes[0]], name = 'f_hidden1',
keep_prob = f_config.keep_prob, phase = training_phase)
f_hidden_2 = BN_layer_ops(f_hidden_1, shape = [f_config.nodes[0], f_config.nodes[1]], name = 'f_hidden2',
keep_prob = f_config.keep_prob, phase = training_phase)
f_hidden_3 = BN_layer_ops(f_hidden_2, shape = [f_config.nodes[1], f_config.nodes[2]], name = 'f_hidden3',
keep_prob = f_config.keep_prob, phase = training_phase)
f_hidden_4 = BN_layer_ops(f_hidden_3, shape = [f_config.nodes[2], f_config.nodes[3]], name = 'f_hidden4',
keep_prob = f_config.keep_prob, phase = training_phase)
f_readout = readout_ops(f_hidden_4, shape = [f_config.nodes[3], config.n_classes], name = 'f_readout')
return f_readout
In [3]:
test_file = '/data/rnn/rnn_B02rho0gamma_kst.h5'
df_X_train, df_X_test, df_y_train, df_y_test = load_data(test_file)
df_y_train = df_y_train.astype(np.int8)
df_y_test = df_y_test.astype(np.int8)
df_train = pd.concat([df_X_train, df_y_train], axis = 1)
df_test = pd.concat([df_X_test, df_y_test], axis = 1)
config.n_features = df_train.shape[1]-1
config.steps_per_epoch = df_train.shape[0] // config.batch_size
config.n_obj_features = config.n_features // config.cardinality
assert config.n_features % config.cardinality == 0, 'Number of features does not divide evenly between objects'
readerTrain = reader(df_train)
readerTest = reader(df_test)
In [4]:
class relationalNN():
# Builds the computational graph
# Object set = {B, rho0, pi+, pi-, gamma}
def __init__(self, config, training = True):
self.config = config
self.x = tf.placeholder(tf.float32, shape = [None, self.config.n_features])
self.y_true = tf.placeholder(tf.int32, shape = None)
self.keep_prob = tf.placeholder(tf.float32)
self.training_phase = tf.placeholder(tf.bool)
self.objects = tf.reshape(self.x, [-1, self.config.cardinality, self.config.n_obj_features])
batch_size = tf.shape(self.x)[0]
self.g = tf.zeros([batch_size, g_config.nodes[-1]])
#with tf.variable_scope('test2'):
"""g"""
for i in range(self.config.cardinality):
oi = self.objects[:,i]
for j in range(self.config.cardinality):
if i != j:
continue
oj = self.objects[:,j]
x_ = tf.concat([oi, oj], axis = 1)
g_out = g_network(x_, self.training_phase, i, j)
self.g = tf.add(self.g, g_out)
"""f"""
self.f = f_network(self.g, self.training_phase)
prediction = tf.nn.softmax(self.f)
classification = tf.argmax(self.f,1)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
self.cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits = self.f, labels = self.y_true))
tf.summary.scalar('cross_entropy', self.cross_entropy)
# Anneal learning rate
self.global_step = tf.Variable(0, trainable=False)
self.beta = tf.train.exponential_decay(config.learning_rate, self.global_step,
decay_steps = config.steps_per_epoch, decay_rate = config.lr_epoch_decay, staircase=True)
with tf.control_dependencies(update_ops):
# Ensures that we execute the update_ops before performing the train_step
self.train_op = tf.train.AdamOptimizer(self.beta).minimize(self.cross_entropy, name = 'optimizer',
global_step = self.global_step)
# Evaluation metrics
correct_prediction = tf.equal(tf.cast(tf.argmax(self.f, 1), tf.int32), self.y_true)
self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar('accuracy', self.accuracy)
_, self.auc_op = tf.metrics.auc(predictions = tf.argmax(self.f,1), labels = self.y_true, num_thresholds = 256)
tf.summary.scalar('auc', self.auc_op)
self.merge_op = tf.summary.merge_all()
self.train_writer = tf.summary.FileWriter(
os.path.join(directories.tensorboard, 'train_{}'.format(time.strftime('%d-%m_%I:%M'))), graph = tf.get_default_graph())
self.test_writer = tf.summary.FileWriter(
os.path.join(directories.tensorboard, 'test_{}'.format(time.strftime('%d-%m_%I:%M'))))
def predict(self, ckpt, metaGraph = None):
saver = tf.train.Saver()
with tf.Session() as sess:
# Initialize variables
init_op = tf.global_variables_initializer()
sess.run(init_op)
start_time = time.time()
assert (ckpt.model_checkpoint_path or metaGraph), 'Missing checkpoint file!'
if metaGraph:
saver = tf.train.import_meta_graph(metaGraph)
saver.restore(sess, os.path.splitext(metaGraph)[0])
print('{} restored.'.format(metaGraph))
else:
saver.restore(sess, ckpt.model_checkpoint_path)
print('{} restored.'.format(ckpt.model_checkpoint_path))
# Make predictions using the trained model
feed_dict_test = {self.x: df_X_test.values, self.y_true: df_y_test.values, self.keep_prob: 1.0, self.training_phase: False}
network_output_test, class_test, final_v_acc, final_v_auc = sess.run(
[prediction, classification, vDNN.accuracy, vDNN.auc_op], feed_dict = feed_dict_test)
print("Validation accuracy: {:g}\nValidation AUC: {:g}".format(final_v_acc, final_v_auc))
plot_ROC_curve(y_pred = class_test, y_true = df_y_test.values, meta = architecture)
delta_t = time.time() - start_time
print("Inference complete. Duration: %g s" %(delta_t))
return network_output_test
In [ ]:
def train(config, restore = False):
# Executes training operations
relNN = relationalNN(config, training = True)
start_time = time.time()
v_acc_best = 0.
saver = tf.train.Saver()
ckpt = tf.train.get_checkpoint_state(directories.checkpoints)
with tf.Session() as sess:
# Initialize variables
init_op = tf.global_variables_initializer()
sess.run(init_op)
if restore and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
print('{} restored.'.format(ckpt.model_checkpoint_path))
for epoch in range(config.num_epochs):
readerTrain.proceed = True
step = 0
# Save every 10 epochs
if epoch % 10 == 0:
save_path = saver.save(sess,
os.path.join(directories.checkpoints,
'relNN_{}_{}_epoch{}.ckpt'.format(config.mode, config.channel, epoch)),
global_step = epoch)
print('Graph saved to file: {}'.format(save_path))
print('(*) Entering Epoch {} ({:.3f} s)'.format(epoch, time.time() - start_time))
while(readerTrain.proceed):
# Iterate through entire corpus
x_train, y_train = readerTrain.next_batch(config.batch_size)
feed_dict_train = {relNN.x: x_train.values, relNN.y_true: y_train.values,
relNN.keep_prob: config.keep_prob, relNN.training_phase: True}#, relNN.eta: eta}
t_op = sess.run(relNN.train_op, feed_dict = feed_dict_train)
step += 1
if step % (config.steps_per_epoch // 20) == 0:
# Evaluate model
improved = ''
sess.run(tf.local_variables_initializer())
x_test, y_test = readerTest.next_batch(config.batch_size)
feed_dict_test = {relNN.x: x_test.values, relNN.y_true: y_test.values, relNN.keep_prob: 1.0,
relNN.training_phase: False}#, relNN.eta: eta}
t_acc, t_loss, t_summary = sess.run([relNN.accuracy, relNN.cross_entropy, relNN.merge_op],
feed_dict = feed_dict_train)
v_acc, v_loss, v_auc, v_summary, = sess.run([relNN.accuracy, relNN.cross_entropy, relNN.auc_op, relNN.merge_op],
feed_dict = feed_dict_test)
relNN.train_writer.add_summary(t_summary, step)
relNN.test_writer.add_summary(v_summary, step)
if epoch > 5 and v_acc > v_acc_best:
v_acc_best = v_acc
improved = '[*]'
save_path = saver.save(sess,
os.path.join(directories.checkpoints,
'relNN_{}_{}_best.ckpt'.format(config.mode, config.channel)),
global_step = epoch)
print('Epoch {}, Step {} | Training Acc: {:.3f} | Test Acc: {:.3f} | Test Loss: {:.3f} | Test AUC {:.3f} ({:.2f} s) {}'
.format(epoch, step, t_acc, v_acc, v_loss, v_auc, time.time() - start_time, improved))
save_path = saver.save(sess, os.path.join(directories.checkpoints, 'relNN_{}_{}_end.ckpt'.format(config.mode, config.channel)),
global_step = epoch)
print('Model saved to file: {}'.format(save_path))
feed_dict_train = {relNN.x: df_X_train.values, relNN.y_true: df_y_train.values, relNN.keep_prob: 1.0, relNN.training_phase: False}
feed_dict_test = {relNN.x: df_X_test.values, relNN.y_true: df_y_test.values, relNN.keep_prob: 1.0, relNN.training_phase: False}
final_t_acc = relNN.accuracy.eval(feed_dict = feed_dict_train)
final_v_acc, final_v_AUC = sess.run([relNN.accuracy, relNN.auc_op], feed_dict = feed_dict_test)
delta_t = time.time() - start_time
print("Training Complete. Time elapsed: {:.3f} s".format(delta_t))
print("Train accuracy: {:g}\nValidation accuracy: {:g}\nValidation AUC: {:g}".format(final_t_acc, final_v_acc, final_v_AUC))
print('Architecture: {}'.format(architecture))
save_summary(config, delta_t, final_t_acc, final_v_acc, final_v_AUC)
In [ ]:
train(config)#, restore = True)
In [ ]:
ckpt = tf.train.get_checkpoint_state(directories.checkpoints)
vDNN = vanillaDNN(config, training = False)
network_output = VDNN.predict(ckpt)
np.save(os.path.join(directories.checkpoints, '{}_{}_y_pred.npy'.format(config.channel, config.mode)), network_output)
np.save(os.path.join(directories.checkpoints, '{}_{}_y_test.npy'.format(config.channel, config.mode)), df_y_test.values)