In [1]:
import numpy as np
import tensorflow as tf
# %matplotlib inline
# import matplotlib.pylab as plt
from helper_functions_twitter import *
import sklearn.metrics as sk
%load_ext autoreload
%autoreload 2
In [4]:
window_size = 1
# note that we encode the tags with numbers for later convenience
tag_to_number = {
u'N': 0, u'O': 1, u'S': 2, u'^': 3, u'Z': 4, u'L': 5, u'M': 6,
u'V': 7, u'A': 8, u'R': 9, u'!': 10, u'D': 11, u'P': 12, u'&': 13, u'T': 14,
u'X': 15, u'Y': 16, u'#': 17, u'@': 18, u'~': 19, u'U': 20, u'E': 21, u'$': 22,
u',': 23, u'G': 24
}
embeddings = embeddings_to_dict('./data/Tweets/embeddings-twitter.txt')
vocab = embeddings.keys()
# we replace <s> with </s> since it has no embedding, and </s> is a better embedding than UNK
xt, yt = data_to_mat('./data/Tweets/tweets-train.txt', vocab, tag_to_number, window_size=window_size,
start_symbol=u'</s>')
xdev, ydev = data_to_mat('./data/Tweets/tweets-dev.txt', vocab, tag_to_number, window_size=window_size,
start_symbol=u'</s>')
xdtest, ydtest = data_to_mat('./data/Tweets/tweets-devtest.txt', vocab, tag_to_number, window_size=window_size,
start_symbol=u'</s>')
data = {
'x_train': xt, 'y_train': yt,
'x_dev': xdev, 'y_dev': ydev,
'x_devtest': xdtest, 'y_devtest': ydtest
}
In [5]:
# build tf inputs
num_epochs = 30
num_tags = 25
hidden_size = 256
batch_size = 64
embedding_dimension = 50
example_size = (2*window_size + 1)*embedding_dimension
init_lr = 0.001
num_examples = data['y_train'].shape[0]
num_batches = num_examples//batch_size
graph = tf.Graph()
with graph.as_default():
x = tf.placeholder(tf.float32, [None, example_size])
y = tf.placeholder(tf.int64, [None])
w1 = tf.Variable(tf.nn.l2_normalize(tf.random_normal([example_size, hidden_size]), 0)/tf.sqrt(1 + 0.425))
b1 = tf.Variable(tf.zeros([hidden_size]))
w2 = tf.Variable(tf.nn.l2_normalize(tf.random_normal([hidden_size, hidden_size]), 0)/tf.sqrt(0.425 + 0.425))
b2 = tf.Variable(tf.zeros([hidden_size]))
w_out = tf.Variable(tf.nn.l2_normalize(tf.random_normal([hidden_size, num_tags]), 0)/tf.sqrt(0.425 + 1))
b_out = tf.Variable(tf.zeros([num_tags]))
def gelu_fast(_x):
return 0.5 * _x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3))))
def model(data_feed):
h1 = gelu_fast(tf.matmul(data_feed, w1) + b1)
h2 = gelu_fast(tf.matmul(h1, w2) + b2)
return tf.matmul(h2, w_out) + b_out
logits = model(x)
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y))
loss += 5e-5*(tf.nn.l2_loss(w1) + tf.nn.l2_loss(w2))
# learning rate annealing
global_step = tf.Variable(0, trainable=False)
# drop lr 15 epochs in
lr = tf.train.exponential_decay(init_lr, global_step, 15*num_batches, 0.1, staircase=True)
# pick optimizer
optimizer = tf.train.AdamOptimizer(lr).minimize(loss, global_step=global_step)
acc = 100*tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits, 1), y), "float"))
In [6]:
# initialize
sess = tf.InteractiveSession(graph=graph)
tf.initialize_all_variables().run()
print('Initialized')
# create saver to train model
saver = tf.train.Saver(max_to_keep=1)
In [7]:
sess.close()
In [7]:
best_acc = 0
# train
for epoch in range(num_epochs):
# shuffle data every epoch
indices = np.arange(num_examples)
np.random.shuffle(indices)
data['x_train'] = data['x_train'][indices]
data['y_train'] = data['y_train'][indices]
for i in range(num_batches):
offset = i * batch_size
x_batch = word_list_to_embedding(data['x_train'][offset:offset + batch_size, :],
embeddings, embedding_dimension)
y_batch = data['y_train'][offset:offset + batch_size]
_, l, batch_acc = sess.run([optimizer, loss, acc],
feed_dict={x: x_batch, y: y_batch})
if i % 100 == 0:
curr_dev_acc = sess.run(
acc, feed_dict={x: word_list_to_embedding(data['x_dev'], embeddings, embedding_dimension),
y: data['y_dev']})
if best_acc < curr_dev_acc:
best_acc = curr_dev_acc
saver.save(sess, './data/best_tweet_model.ckpt')
print('Epoch %d | Minibatch loss %.3f | Minibatch accuracy %.3f | Dev accuracy %.3f' %
(epoch, l, batch_acc, curr_dev_acc))
In [8]:
# restore variables from disk
saver.restore(sess, "./data/best_tweet_model.ckpt")
print("Best model restored!")
print('DevTest accuracy:', sess.run(
acc, feed_dict={x: word_list_to_embedding(data['x_devtest'], embeddings, embedding_dimension),
y: data['y_devtest']}))
In [9]:
s = tf.nn.softmax(logits)
s_prob = tf.reduce_max(s, reduction_indices=[1], keep_dims=True)
kl_all = tf.log(25.) + tf.reduce_sum(s * tf.log(tf.abs(s) + 1e-11), reduction_indices=[1], keep_dims=True)
m_all, v_all = tf.nn.moments(kl_all, axes=[0])
logits_right = tf.boolean_mask(logits, tf.equal(tf.argmax(logits, 1), y))
s_right = tf.nn.softmax(logits_right)
s_right_prob = tf.reduce_max(s_right, reduction_indices=[1], keep_dims=True)
kl_right = tf.log(25.) + tf.reduce_sum(s_right * tf.log(tf.abs(s_right) + 1e-11), reduction_indices=[1], keep_dims=True)
m_right, v_right = tf.nn.moments(kl_right, axes=[0])
logits_wrong = tf.boolean_mask(logits, tf.not_equal(tf.argmax(logits, 1), y))
s_wrong = tf.nn.softmax(logits_wrong)
s_wrong_prob = tf.reduce_max(s_wrong, reduction_indices=[1], keep_dims=True)
kl_wrong = tf.log(25.) + tf.reduce_sum(s_wrong * tf.log(tf.abs(s_wrong) + 1e-11), reduction_indices=[1], keep_dims=True)
m_wrong, v_wrong = tf.nn.moments(kl_wrong, axes=[0])
In [10]:
err, kl_a, kl_r, kl_w, s_p, s_rp, s_wp = sess.run(
[100 - acc, kl_all, kl_right, kl_wrong, s_prob, s_right_prob, s_wrong_prob],
feed_dict={x: word_list_to_embedding(data['x_dev'],embeddings, embedding_dimension),
y: data['y_dev']})
print('Twitter Error (%)| Prediction Prob (mean, std) | PProb Right (mean, std) | PProb Wrong (mean, std):')
print(err, '|', np.mean(s_p), np.std(s_p), '|', np.mean(s_rp), np.std(s_rp), '|', np.mean(s_wp), np.std(s_wp))
print('\nSuccess Detection')
print('Success base rate (%):', round(100-err,2))
print('KL[p||u]: Right/Wrong classification distinction')
safe, risky = kl_r, kl_w
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[:safe.shape[0]] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('Prediction Prob: Right/Wrong classification distinction')
safe, risky = s_rp, s_wp
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[:safe.shape[0]] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('\nError Detection')
print('Error base rate (%):', round(err,2))
safe, risky = -kl_r, -kl_w
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[safe.shape[0]:] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('KL[p||u]: Right/Wrong classification distinction')
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('Prediction Prob: Right/Wrong classification distinction')
safe, risky = -s_rp, -s_wp
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[safe.shape[0]:] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))