In [1]:
import tensorflow as tf
import numpy as np
import re
import collections
import sklearn.metrics as sk
from helper_functions_wsj import *
from glob import glob
from reader import Reader
import time
%load_ext autoreload
%autoreload 2
In [2]:
print('Loading WSJ Data')
reader = Reader(split=0.9)
(X_train, Y_train, mask_train,
X_test, Y_test, mask_test) = \
reader.get_data(glob('./data/WSJ/*/*.POS'))
print('Loaded WSJ Data')
In [3]:
graph = tf.Graph()
with graph.as_default():
batch_size = 32
hidden_size = 128
num_layers = 3
vocab_size = len(reader.word_to_id)
tag_size = len(reader.tag_to_id)
maxlen = reader.maxlen
input_data = tf.placeholder(tf.int64, [None, maxlen])
targets = tf.placeholder(tf.int64, [None, maxlen])
mask = tf.placeholder(tf.bool, [None, maxlen])
lstm_cell = tf.nn.rnn_cell.LSTMCell(hidden_size, state_is_tuple=True)
# if is_training and dropout_keep_prob < 1:
# lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
# lstm_cell, output_keep_prob=dropout_keep_prob)
cell_fw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers, state_is_tuple=True)
cell_bw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers, state_is_tuple=True)
initial_state_fw = cell_fw.zero_state(tf.shape(input_data)[0], tf.float32)
initial_state_bw = cell_bw.zero_state(tf.shape(input_data)[0], tf.float32)
with tf.device("/cpu:0"):
embedding = tf.get_variable("embedding", [vocab_size,
hidden_size])
inputs = tf.nn.embedding_lookup(embedding, input_data)
inputs = [input_ for input_ in tf.unpack(tf.transpose(inputs, [1, 0, 2]))]
# if is_training and dropout_keep_prob < 1:
# inputs = tf.nn.dropout(tf.pack(inputs), dropout_keep_prob)
# inputs = tf.unpack(inputs)
outputs, _, _ = tf.nn.bidirectional_rnn(cell_fw, cell_bw, inputs,
initial_state_fw=initial_state_fw,
initial_state_bw=initial_state_bw)
# output from forward and backward cells.
output = tf.reshape(tf.concat(1, outputs), [-1, 2 * hidden_size])
softmax_w = tf.get_variable("softmax_w", [2 * hidden_size, tag_size])
softmax_b = tf.get_variable("softmax_b", [tag_size])
logits = tf.matmul(output, softmax_w) + softmax_b
loss = tf.nn.seq2seq.sequence_loss_by_example(
[logits], [tf.reshape(targets, [-1])],
[tf.reshape(tf.cast(mask, tf.float32), [-1])], tag_size)
cost = tf.reduce_sum(loss) / batch_size
equality = tf.equal(tf.argmax(logits, 1),
tf.cast(tf.reshape(targets, [-1]), tf.int64))
masked = tf.boolean_mask(equality, tf.reshape(mask, [-1]))
misclass = 1 - tf.reduce_mean(tf.cast(masked, tf.float32))
lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), 5.0)
optimizer = tf.train.GradientDescentOptimizer(lr)
train_op = optimizer.apply_gradients(zip(grads, tvars))
def assign_lr(session, lr_value):
session.run(tf.assign(lr, lr_value))
def run_epoch(x_data, y_data, data_mask, eval_op, training=True, verbose=False):
"""Runs the model on the given data."""
epoch_size = ((len(x_data) // batch_size) - 1)
start_time = time.time()
costs = 0.0
iters = 0
misclass_ = []
for step, (x, y, data_mask) in enumerate(Reader.iterator(x_data, y_data, data_mask, batch_size)):
if training is True:
l, misclassifications, _ = sess.run([cost, misclass, eval_op],
{input_data: x, targets: y, mask: data_mask})
else:
l, misclassifications = sess.run([cost, misclass],
{input_data: x, targets: y, mask: data_mask})
costs += l
iters += batch_size
if verbose and step % (epoch_size // 10) == 0:
print("[%s] %.3f perplexity: %.3f misclass:%.3f speed: %.0f wps" %
('train' if training else 'test', step * 1.0 / epoch_size,
np.exp(costs / iters), misclassifications,
iters * batch_size / (time.time() - start_time)))
misclass_.append(misclassifications)
return np.exp(costs / iters), np.mean(misclass_)
In [4]:
sess = tf.InteractiveSession(graph=graph)
tf.initialize_all_variables().run()
print('Initialized')
# create saver for model
saver = tf.train.Saver(max_to_keep=1)
In [17]:
sess.close()
In [5]:
best_misclass = 1.0
for i in range(10):
lr_decay = 0.5 ** max(i - 4, 0.0)
assign_lr(sess, 1.0 * lr_decay)
print("Epoch: %d Learning rate: %.3f" % (i + 1, sess.run(lr)))
train_perplexity, _ = run_epoch(X_train, Y_train, mask_train,
train_op, verbose=True)
_, misclassifications = run_epoch(X_test, Y_test, mask_test,
tf.no_op(), training=False, verbose=True)
if misclassifications < best_misclass:
best_misclass = misclassifications
saver.save(sess, './data/bid3rnn_tagger.ckpt', global_step=i)
print('Saving')
In [6]:
saver.restore(sess, "./data/bid3rnn_tagger.ckpt-9")
print("Best model restored!")
In [7]:
smothered_logits = tf.boolean_mask(logits, tf.reshape(mask, [-1]))
smothered_targets = tf.reshape(tf.boolean_mask(targets, mask), [-1])
s = tf.nn.softmax(smothered_logits)
s_prob = tf.reduce_max(s, reduction_indices=[1], keep_dims=True)
kl_all = tf.log(len(reader.tag_to_id)*1.) + tf.reduce_sum(s * tf.log(tf.abs(s) + 1e-10),
reduction_indices=[1], keep_dims=True)
m_all, v_all = tf.nn.moments(kl_all, axes=[0])
logits_right = tf.boolean_mask(smothered_logits,
tf.equal(tf.argmax(smothered_logits, 1), smothered_targets))
s_right = tf.nn.softmax(logits_right)
s_right_prob = tf.reduce_max(s_right, reduction_indices=[1], keep_dims=True)
kl_right = tf.log(len(reader.tag_to_id)*1.) + tf.reduce_sum(s_right * tf.log(tf.abs(s_right) + 1e-10),
reduction_indices=[1], keep_dims=True)
m_right, v_right = tf.nn.moments(kl_right, axes=[0])
logits_wrong = tf.boolean_mask(smothered_logits,
tf.not_equal(tf.argmax(smothered_logits, 1), smothered_targets))
s_wrong = tf.nn.softmax(logits_wrong)
s_wrong_prob = tf.reduce_max(s_wrong, reduction_indices=[1], keep_dims=True)
kl_wrong = tf.log(len(reader.tag_to_id)*1.) + tf.reduce_sum(s_wrong * tf.log(tf.abs(s_wrong) + 1e-10),
reduction_indices=[1], keep_dims=True)
m_wrong, v_wrong = tf.nn.moments(kl_wrong, axes=[0])
In [8]:
err, kl_a, kl_r, kl_w, s_p, s_rp, s_wp = sess.run(
[100*misclass, kl_all, kl_right, kl_wrong, s_prob, s_right_prob, s_wrong_prob],
feed_dict={input_data: X_test, targets: Y_test, mask: mask_test})
print('WSJ Error (%)| Prediction Prob (mean, std) | PProb Right (mean, std) | PProb Wrong (mean, std):')
print(err, '|', np.mean(s_p), np.std(s_p), '|', np.mean(s_rp), np.std(s_rp), '|', np.mean(s_wp), np.std(s_wp))
print('\nSuccess Detection')
print('Success base rate (%):', round(100-err,2))
print('KL[p||u]: Right/Wrong classification distinction')
safe, risky = kl_r, kl_w
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[:safe.shape[0]] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('Prediction Prob: Right/Wrong classification distinction')
safe, risky = s_rp, s_wp
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[:safe.shape[0]] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('\nError Detection')
print('Error base rate (%):', round(err,2))
safe, risky = -kl_r, -kl_w
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[safe.shape[0]:] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('KL[p||u]: Right/Wrong classification distinction')
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('Prediction Prob: Right/Wrong classification distinction')
safe, risky = -s_rp, -s_wp
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[safe.shape[0]:] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
In [12]:
def show_ood_detection_results(error_rate_for_in, in_examples, out_examples, out_mask):
kl_oos, s_p_oos = sess.run([kl_all, s_prob], feed_dict={input_data: out_examples, mask: out_mask})
print('OOD Example Prediction Probability (mean, std):')
print(np.mean(s_p_oos), np.std(s_p_oos))
print('\nNormality Detection')
print('Normality base rate (%):', round(100*in_examples.shape[0]/(
out_examples.shape[0] + in_examples.shape[0]),2))
print('KL[p||u]: Normality Detection')
safe, risky = kl_a, kl_oos
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[:safe.shape[0]] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('Prediction Prob: Normality Detection')
safe, risky = s_p, s_p_oos
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[:safe.shape[0]] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('Normality base rate (%):', round(100*(1 - err/100)*in_examples.shape[0]/
(out_examples.shape[0] + (1 - err/100)*in_examples.shape[0]),2))
print('KL[p||u]: Normality Detection (relative to correct examples)')
safe, risky = kl_r, kl_oos
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[:safe.shape[0]] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('Prediction Prob: Normality Detection (relative to correct examples)')
safe, risky = s_rp, s_p_oos
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[:safe.shape[0]] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('\n\nAbnormality Detection')
print('Abnormality base rate (%):', round(100*out_examples.shape[0]/(
out_examples.shape[0] + in_examples.shape[0]),2))
print('KL[p||u]: Abnormality Detection')
safe, risky = -kl_a, -kl_oos
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[safe.shape[0]:] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('Prediction Prob: Abnormality Detection')
safe, risky = -s_p, -s_p_oos
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[safe.shape[0]:] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('Abnormality base rate (%):', round(100*out_examples.shape[0]/
(out_examples.shape[0] + (1 - err/100)*in_examples.shape[0]),2))
print('KL[p||u]: Abnormality Detection (relative to correct examples)')
safe, risky = -kl_r, -kl_oos
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[safe.shape[0]:] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('Prediction Prob: Abnormality Detection (relative to correct examples)')
safe, risky = -s_rp, -s_p_oos
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[safe.shape[0]:] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
In [10]:
reader.tag_to_id # determine START, END, and PAD symbols from this; it's 0, 15, 16 in this run
In [11]:
def mask_for_data(_dataset, to_ignore=[0,15,16]):
_mask = np.ones(_dataset.shape, dtype=np.bool)
for tag_to_ignore in to_ignore:
_mask = np.logical_and(_mask, _dataset != tag_to_ignore)
return _mask
vocab = reader.word_to_id.keys()
# we replace <s> with </s> since it has no embedding, and </s> is a better embedding than UNK
xt, yt = data_to_mat('./data/Tweets/tweets-train.txt', vocab, reader.word_to_id,
start_tag=0, end_tag=15, pad_tag=16)
xdev, ydev = data_to_mat('./data/Tweets/tweets-dev.txt', vocab, reader.word_to_id,
start_tag=0, end_tag=15, pad_tag=16)
xdtest, ydtest = data_to_mat('./data/Tweets/tweets-devtest.txt', vocab, reader.word_to_id,
start_tag=0, end_tag=15, pad_tag=16)
tweets = {
'x_train': xt, 'y_train': yt, 'train_mask': mask_for_data(yt),
'x_dev': xdev, 'y_dev': ydev, 'dev_mask': mask_for_data(ydev),
'x_devtest': xdtest, 'y_devtest': ydtest, 'devtest_mask': mask_for_data(ydtest),
}
In [13]:
print('Twitter OOD Detection\n')
show_ood_detection_results(err, X_test, tweets['x_devtest'], tweets['devtest_mask'])
In [15]:
xtest, ytest = data_to_mat('./data/WebTreeBank/weblog_penntrees.test.conll', vocab,
reader.word_to_id, is_not_twitter=True, start_tag=0, end_tag=15, pad_tag=16)
In [17]:
print('Webblog OOD Detection\n')
show_ood_detection_results(err, X_test, xtest, mask_for_data(ytest))