The code sets up the model, evaluates the effectiveness of softmax information alone, and after shows the improvement gained from an abnormality module.
In [1]:
import tensorflow as tf
import numpy as np
import h5py as h5
import sklearn.metrics as sk
In [2]:
# training parameters
learning_rate = 0.001
training_epochs = 20
batch_size = 32
# architecture parameters
n_hidden = 1024
n_labels = 39 # 39 phones
n_coeffs = 26
n_context_frames = 11 # 5 + 1 + 5
p = 0.75 # keep rate
In [3]:
def enumerate_context(i, sentence, num_frames):
r = range(i-num_frames, i+num_frames+1)
r = [x if x>=0 else 0 for x in r]
r = [x if x<len(sentence) else len(sentence)-1 for x in r]
return sentence[r]
def add_context(sentence, num_frames=11):
# [sentence_length, coefficients] -> [sentence_length, num_frames, coefficients]
assert num_frames % 2 == 1, "Number of frames must be odd (since left + 1 + right, left = right)"
if num_frames == 1:
return sentence
context_sent = []
for i in range(0, len(sentence)):
context_sent.append([context for context in enumerate_context(i, sentence, (num_frames-1)//2)])
return np.array(context_sent).reshape((-1, num_frames*n_coeffs))
In [4]:
graph = tf.Graph()
with graph.as_default():
x = tf.placeholder(dtype=tf.float32, shape=[None, n_coeffs*n_context_frames])
y = tf.placeholder(dtype=tf.int64, shape=[None])
risk_labels = tf.placeholder(dtype=tf.float32, shape=[None])
is_training = tf.placeholder(tf.bool)
# nonlinearity
def gelu_fast(_x):
return 0.5 * _x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3))))
f = gelu_fast
W = {}
b = {}
with tf.variable_scope("in_sample"):
W['1'] = tf.Variable(tf.nn.l2_normalize(tf.random_normal([n_context_frames*n_coeffs, n_hidden]), 0)/tf.sqrt(1 + p*0.425))
W['2'] = tf.Variable(tf.nn.l2_normalize(tf.random_normal([n_hidden, n_hidden]), 0)/tf.sqrt(0.425/p + p*0.425))
W['3'] = tf.Variable(tf.nn.l2_normalize(tf.random_normal([n_hidden, n_hidden]), 0)/tf.sqrt(0.425/p + p*0.425))
W['logits'] = tf.Variable(tf.nn.l2_normalize(tf.random_normal([n_hidden, n_labels]), 0)/tf.sqrt(0.425/p + 1))
b['1'] = tf.Variable(tf.zeros([n_hidden]))
b['2'] = tf.Variable(tf.zeros([n_hidden]))
b['3'] = tf.Variable(tf.zeros([n_hidden]))
b['logits'] = tf.Variable(tf.zeros([n_labels]))
W['bottleneck'] = tf.Variable(tf.nn.l2_normalize(tf.random_normal([n_hidden, n_hidden//2]), 0)/tf.sqrt(0.425/p + 0.425))
W['decode1'] = tf.Variable(tf.nn.l2_normalize(tf.random_normal([n_hidden//2, n_hidden]), 0)/tf.sqrt(0.425 + p*0.425))
W['decode2'] = tf.Variable(tf.nn.l2_normalize(tf.random_normal([n_hidden, n_hidden]), 0)/tf.sqrt(0.425/p + 0.425*p))
W['reconstruction'] = tf.Variable(tf.nn.l2_normalize(tf.random_normal([n_hidden, n_context_frames*n_coeffs]), 0)/tf.sqrt(0.425/p + 1))
b['bottleneck'] = tf.Variable(tf.zeros([n_hidden//2]))
b['decode1'] = tf.Variable(tf.zeros([n_hidden]))
b['decode2'] = tf.Variable(tf.zeros([n_hidden]))
b['reconstruction'] = tf.Variable(tf.zeros([n_context_frames*n_coeffs]))
with tf.variable_scope("out_of_sample"):
W['residual_to_risk1'] = tf.Variable(tf.nn.l2_normalize(tf.random_normal([n_context_frames*n_coeffs, n_hidden//2]), 0)/tf.sqrt(1 + 0.425))
W['hidden_to_risk1'] = tf.Variable(tf.nn.l2_normalize(tf.random_normal([n_hidden, n_hidden//2]), 0)/tf.sqrt(0.425/p + 0.425))
W['logits_to_risk1'] = tf.Variable(tf.nn.l2_normalize(tf.random_normal([n_labels, n_hidden//2]), 0)/tf.sqrt(1 + 0.425))
W['risk2'] = tf.Variable(tf.nn.l2_normalize(tf.random_normal([n_hidden//2, 128]), 0)/tf.sqrt(0.425 + 0.425))
W['risk'] = tf.Variable(tf.nn.l2_normalize(tf.random_normal([128, 1]), 0)/tf.sqrt(0.425 + 1))
b['risk1'] = tf.Variable(tf.zeros([n_hidden//2]))
b['risk2'] = tf.Variable(tf.zeros([128]))
b['risk'] = tf.Variable(tf.zeros([1]))
def feedforward(x):
h1 = f(tf.matmul(x, W['1']) + b['1'])
h1 = tf.cond(is_training, lambda: tf.nn.dropout(h1, p), lambda: h1)
h2 = f(tf.matmul(h1, W['2']) + b['2'])
h2 = tf.cond(is_training, lambda: tf.nn.dropout(h2, p), lambda: h2)
h3 = f(tf.matmul(h2, W['3']) + b['3'])
h3 = tf.cond(is_training, lambda: tf.nn.dropout(h3, p), lambda: h3)
out = tf.matmul(h3, W['logits']) + b['logits']
hidden_to_bottleneck = f(tf.matmul(h2, W['bottleneck']) + b['bottleneck'])
d1 = f(tf.matmul(hidden_to_bottleneck, W['decode1']) + b['decode1'])
d1 = tf.cond(is_training, lambda: tf.nn.dropout(d1, p), lambda: d1)
d2 = f(tf.matmul(d1, W['decode2']) + b['decode2'])
d2 = tf.cond(is_training, lambda: tf.nn.dropout(d2, p), lambda: d2)
recreation = tf.matmul(d2, W['reconstruction']) + b['reconstruction']
risk1 = f(tf.matmul(out, W['logits_to_risk1']) +
tf.matmul(tf.square(x - recreation), W['residual_to_risk1']) +
tf.matmul(h2, W['hidden_to_risk1']) + b['risk1'])
risk2 = f(tf.matmul(risk1, W['risk2']) + b['risk2'])
risk_out = tf.matmul(risk2, W['risk'])
return out, recreation, tf.squeeze(risk_out)
logits, reconstruction, risk = feedforward(x)
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y)) +\
0.1 * tf.reduce_mean(tf.square(x - reconstruction)) +\
1e-4*(tf.nn.l2_loss(W['1']) + tf.nn.l2_loss(W['2']) + tf.nn.l2_loss(W['3']) +
tf.nn.l2_loss(W['bottleneck']) + tf.nn.l2_loss(W['decode1']) + tf.nn.l2_loss(W['decode2']))
lr = tf.constant(learning_rate)
optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss)
compute_error = tf.reduce_mean(tf.to_float(tf.not_equal(tf.argmax(logits, 1), y)))
In [6]:
print('Loading Data')
data = h5.File("train.h5")
X_train = data['X'][()]
Y_train = data['y'][()]
train_idxs = data['start_idx'][()]
# get validation set
X_val = X_train[-500:]
Y_val = Y_train[-500:]
val_idxs = train_idxs[-500:]
X_train = X_train[:-500]
Y_train = Y_train[:-500]
train_idxs = train_idxs[:-500]
train_mean = np.mean(X_train, axis=(0,1))
train_std = np.std(X_train, axis=(0,1))
X_train -= train_mean
X_train /= (train_std + 1e-11)
# NOTE: the test set is not the core test set but the entire, so the it's easier
data = h5.File("test.h5")
X_test = data['X'][()] - train_mean
Y_test = data['y'][()]
test_idxs = data['start_idx'][()]
X_test -= train_mean
X_test /= (train_std + 1e-11)
del data
print('Number of training examples', X_train.shape[0])
print('Number of validation examples', X_val.shape[0])
print('Number of testing examples', X_test.shape[0])
In [7]:
sess = tf.InteractiveSession(graph=graph)
In [8]:
in_sample_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "in_sample")
out_of_sample_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "out_of_sample")
sess.run(tf.initialize_variables(set(tf.all_variables()) - set(out_of_sample_vars)))
risk_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(risk, risk_labels))
phase2_vars = list(set(tf.all_variables()) - set(in_sample_vars))
risk_optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(risk_loss, var_list=phase2_vars)
sess.run(tf.initialize_variables(set(tf.all_variables()) - set(in_sample_vars)))
compute_risk_error = tf.reduce_mean(tf.to_float(tf.not_equal(tf.to_int64(tf.round(tf.sigmoid(risk))),
tf.to_int64(tf.round(risk_labels)))))
# could collapse this into an "initialize all" statement but that might have less fecundity
In [16]:
sess.close()
In [9]:
saver = tf.train.Saver(max_to_keep=1)
saver.restore(sess, "./fcn.ckpt")
print('Model restored')
In [10]:
s = tf.nn.softmax(logits)
s_prob = tf.reduce_max(s, reduction_indices=[1], keep_dims=True)
kl_all = tf.log(39.) + tf.reduce_sum(s * tf.log(tf.abs(s) + 1e-11), reduction_indices=[1], keep_dims=True)
m_all, v_all = tf.nn.moments(kl_all, axes=[0])
logits_right = tf.boolean_mask(logits, tf.equal(tf.argmax(logits, 1), y))
s_right = tf.nn.softmax(logits_right)
s_right_prob = tf.reduce_max(s_right, reduction_indices=[1], keep_dims=True)
kl_right = tf.log(39.) + tf.reduce_sum(s_right * tf.log(tf.abs(s_right) + 1e-11), reduction_indices=[1], keep_dims=True)
m_right, v_right = tf.nn.moments(kl_right, axes=[0])
logits_wrong = tf.boolean_mask(logits, tf.not_equal(tf.argmax(logits, 1), y))
s_wrong = tf.nn.softmax(logits_wrong)
s_wrong_prob = tf.reduce_max(s_wrong, reduction_indices=[1], keep_dims=True)
kl_wrong = tf.log(39.) + tf.reduce_sum(s_wrong * tf.log(tf.abs(s_wrong) + 1e-11), reduction_indices=[1], keep_dims=True)
m_wrong, v_wrong = tf.nn.moments(kl_wrong, axes=[0])
In [11]:
kl_a, kl_r, kl_w, s_p, s_rp, s_wp = [], [], [], [], [], []
err_total = 0
for i in range(X_test.shape[0]//batch_size):
offset = i * batch_size
_bx, mask_x, _by = X_test[offset:offset+batch_size], test_idxs[offset:offset+batch_size], Y_test[offset:offset+batch_size]
bx, by = [], []
for i in range(_bx.shape[0]):
sentence_frames = add_context(_bx[i][mask_x[i]:])
bx.append(sentence_frames)
by.append(_by[i][mask_x[i]:])
bx, by = np.concatenate(bx), np.concatenate(by)
err, kl_a_curr, kl_r_curr, kl_w_curr, s_p_curr, s_rp_curr, s_wp_curr = sess.run(
[100*compute_error, kl_all, kl_right, kl_wrong, s_prob, s_right_prob, s_wrong_prob],
feed_dict={x: bx, y: by, is_training: False})
kl_a.append(kl_a_curr)
kl_r.append(kl_r_curr)
kl_w.append(kl_w_curr)
s_p.append(s_p_curr)
s_rp.append(s_rp_curr)
s_wp.append(s_wp_curr)
err_total += err
err_total /= X_test.shape[0]//batch_size
kl_a = np.concatenate(kl_a)
kl_r = np.concatenate(kl_r)
kl_w = np.concatenate(kl_w)
s_p = np.concatenate(s_p)
s_rp = np.concatenate(s_rp)
s_wp = np.concatenate(s_wp)
In [13]:
print('Frame Error (%)| Prediction Prob (mean, std) | PProb Right (mean, std) | PProb Wrong (mean, std):')
print(err_total, '|', np.mean(s_p), np.std(s_p), '|', np.mean(s_rp), np.std(s_rp), '|', np.mean(s_wp), np.std(s_wp))
print('\nSuccess Detection')
print('Success base rate (%):', round(100-err_total,2))
print('KL[p||u]: Right/Wrong classification distinction')
safe, risky = kl_r, kl_w
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[:safe.shape[0]] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('Prediction Prob: Right/Wrong classification distinction')
safe, risky = s_rp, s_wp
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[:safe.shape[0]] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('\nError Detection')
print('Error base rate (%):', round(err_total,2))
safe, risky = -kl_r, -kl_w
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[safe.shape[0]:] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('KL[p||u]: Right/Wrong classification distinction')
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('Prediction Prob: Right/Wrong classification distinction')
safe, risky = -s_rp, -s_wp
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[safe.shape[0]:] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
The base rates are incorrectly printed.
In [24]:
for oos_name in ['airport', 'babble', 'car', 'exhibition', 'restaurant', 'subway', 'street', 'train']:
data = h5.File("test_" + oos_name + ".h5") # real noise at a volume of 30%
oos_x = data['X'][()]
oos_y = data['y'][()]
oos_idxs = data['start_idx'][()]
oos_x -= train_mean
oos_x /= (train_std + 1e-11)
kl_oos = []
s_p_oos = []
for i in range(oos_x.shape[0]//batch_size):
offset = i * batch_size
_bx, mask_x, _by = oos_x[offset:offset+batch_size], oos_idxs[offset:offset+batch_size], oos_y[offset:offset+batch_size]
bx, by = [], []
for i in range(_bx.shape[0]):
sentence_frames = add_context(_bx[i][mask_x[i]:])
bx.append(sentence_frames)
by.append(_by[i][mask_x[i]:])
bx, by = np.concatenate(bx), np.concatenate(by)
kl_oos_curr, s_p_oos_curr = sess.run([kl_all, s_prob], feed_dict={x: bx, is_training: False})
kl_oos.append(kl_oos_curr)
s_p_oos.append(s_p_oos_curr)
print('\n\n' + oos_name, 'Example Prediction Probability (mean, std):')
print(np.mean(np.concatenate(s_p_oos)), np.std(np.concatenate(s_p_oos)))
print('\nNormality Detection')
print('Normality base rate (%):', round(50,2))
print('KL[p||u]: Normality Detection')
safe, risky = kl_a, np.concatenate(kl_oos)
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[:safe.shape[0]] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('Prediction Prob: Normality Detection')
safe, risky = s_p, np.concatenate(s_p_oos)
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[:safe.shape[0]] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('Normality base rate (%):', round(100*1./(1 + 1 - err_total/100),2))
print('KL[p||u]: Normality Detection (relative to correct examples)')
safe, risky = kl_r, np.concatenate(kl_oos)
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[:safe.shape[0]] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('Prediction Prob: Normality Detection (relative to correct examples)')
safe, risky = s_rp, np.concatenate(s_p_oos)
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[:safe.shape[0]] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('\nAbnormality Detection')
print('Abnormality base rate (%):', round(50,2))
print('KL[p||u]: Abnormality Detection')
safe, risky = -kl_a, -np.concatenate(kl_oos)
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[safe.shape[0]:] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('Prediction Prob: Normality Detection')
safe, risky = -s_p, -np.concatenate(s_p_oos)
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[safe.shape[0]:] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('Abnormality base rate (%):', round(100*1./(1 + 1 - err_total/100),2))
print('KL[p||u]: Normality Detection (relative to correct examples)')
safe, risky = -kl_r, -np.concatenate(kl_oos)
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[safe.shape[0]:] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('Prediction Prob: Normality Detection (relative to correct examples)')
safe, risky = -s_rp, -np.concatenate(s_p_oos)
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[safe.shape[0]:] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
In [28]:
print('Loading OOD Data')
data = h5.File("train_p_0.02.h5")
p_02 = data['X'][()]
p_02_idxs = data['start_idx'][()]
p_02 -= train_mean
p_02 /= (train_std + 1e-11)
data = h5.File("train_w_0.005.h5")
w_005 = data['X'][()]
w_005_idxs = data['start_idx'][()]
w_005 -= train_mean
w_005 /= (train_std + 1e-11)
data = h5.File("train_b_0.05.h5")
b_05 = data['X'][()]
b_05_idxs = data['start_idx'][()]
b_05 -= train_mean
b_05 /= (train_std + 1e-11)
del data
In [29]:
print('Training the risk neuron')
num_batches = X_train.shape[0] // batch_size
err_ema = 1./n_labels
risk_loss_ema = 0.3 # -log(0.5)
for epoch in range(2): # 3 epoch
# shuffle data
indices = np.arange(X_train.shape[0])
np.random.shuffle(indices)
X_train = X_train[indices]
Y_train = Y_train[indices]
train_idxs = train_idxs[indices]
p_02 = p_02[indices]
p_02_idxs = p_02_idxs[indices]
w_005 = w_005[indices]
w_005_idxs = w_005_idxs[indices]
b_05 = b_05[indices]
b_05_idxs = b_05_idxs[indices]
for i in range(num_batches):
offset = i * batch_size
# get in-sample data
_bx1, mask_x1 = X_train[offset:offset+batch_size//4], train_idxs[offset:offset+batch_size//4]
bx1 = []
for i in range(_bx1.shape[0]):
sentence_frames = add_context(_bx1[i][mask_x1[i]:])
bx1.append(sentence_frames)
bx1 = np.concatenate(bx1)
val_indices = np.arange(X_val.shape[0])
np.random.shuffle(val_indices)
_bx2, mask_x2 = X_val[val_indices[0:batch_size//4]], val_idxs[val_indices[0:batch_size//4]]
bx2 = []
for i in range(_bx2.shape[0]):
sentence_frames = add_context(_bx2[i][mask_x2[i]:])
bx2.append(sentence_frames)
bx2 = np.concatenate(bx2)
# get oos data
_bx3, mask_x3 = p_02[offset:offset+batch_size//6], p_02_idxs[offset:offset+batch_size//6]
bx3 = []
for i in range(_bx3.shape[0]):
sentence_frames = add_context(_bx3[i][mask_x3[i]:])
bx3.append(sentence_frames)
bx3 = np.concatenate(bx3)
_bx4, mask_x4 = w_005[offset:offset+batch_size//6], w_005_idxs[offset:offset+batch_size//6]
bx4 = []
for i in range(_bx4.shape[0]):
sentence_frames = add_context(_bx4[i][mask_x4[i]:])
bx4.append(sentence_frames)
bx4 = np.concatenate(bx4)
_bx5, mask_x5 = b_05[offset:offset+batch_size//6], b_05_idxs[offset:offset+batch_size//6]
bx5 = []
for i in range(_bx5.shape[0]):
sentence_frames = add_context(_bx5[i][mask_x5[i]:])
bx5.append(sentence_frames)
bx5 = np.concatenate(bx5)
risks = np.zeros(bx1.shape[0] + bx2.shape[0] + bx3.shape[0] + bx4.shape[0] + bx5.shape[0])
risks[:bx1.shape[0] + bx2.shape[0]] = 1
bx = np.concatenate((bx1, bx2, bx3, bx4, bx5), axis=0)
_, rl, err = sess.run([risk_optimizer, risk_loss, compute_risk_error],
feed_dict={x: bx, risk_labels: risks, is_training: False})
risk_loss_ema = risk_loss_ema * 0.95 + 0.05 * rl
err_ema = err_ema * 0.95 + 0.05 * err
print('Epoch:', epoch, '|', 'ema of risk for epoch:', risk_loss_ema, 'error (%):', 100*err_ema)
In [30]:
err_total = 0
risk_err_total = 0
risk_total = []
risk_right_total = []
risk_wrong_total = []
conf_total = []
for i in range(X_test.shape[0]//batch_size):
offset = i * batch_size
_bx, mask_x, _by = X_test[offset:offset+batch_size], test_idxs[offset:offset+batch_size], Y_test[offset:offset+batch_size]
bx, by = [], []
for i in range(_bx.shape[0]):
sentence_frames = add_context(_bx[i][mask_x[i]:])
bx.append(sentence_frames)
by.append(_by[i][mask_x[i]:])
bx, by = np.concatenate(bx), np.concatenate(by)
err, r_err, r, conf = sess.run([100*compute_error, 100*compute_risk_error,
tf.sigmoid(risk), tf.nn.softmax(logits)],
feed_dict={x: bx, y: by, risk_labels: np.ones(by.shape[0]), is_training: False})
r_right = r[np.argmax(conf, axis=1).astype(np.int32) == by]
r_wrong = r[np.argmax(conf, axis=1).astype(np.int32) != by]
err_total += err
risk_err_total += r_err
risk_total.append(r)
conf_total.append(conf)
risk_right_total.append(r_right)
risk_wrong_total.append(r_wrong)
risk_err_total /= X_test.shape[0]//batch_size
err_total /= X_test.shape[0]//batch_size
In [31]:
print('TIMIT Clean Frame Error (%) | TIMIT Frame Riskiness Error (0.5 cutoff) (%) | Frame Confidence (mean, std):')
print(err_total, '|', risk_err_total, '|', np.mean(np.max(np.concatenate(conf_total), axis=1)),
np.std(np.max(np.concatenate(conf_total), axis=1)))
In [32]:
safe, risky = np.concatenate(risk_right_total).reshape(-1,1), np.concatenate(risk_wrong_total).reshape(-1,1)
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[:safe.shape[0]] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('Risk Neuron: Clean Right/Wrong classification distinction')
print('AUPR', sk.average_precision_score(labels, examples))
print('AUROC', sk.roc_auc_score(labels, examples))
The implication is that the logistic regression neuron is as not great for detecting whether the example is misclassified. Perhaps is incorrect examples were treated as negative examples we would do better.
Now let's try OOD examples.
Update: Base rates should be as above. err_total is updated while it should refer to an older value. They're incorrectly printed but this doesn't affect anything else.
In [33]:
for oos_name in ['airport', 'babble', 'car', 'exhibition', 'restaurant', 'subway', 'street', 'train']:
data = h5.File("test_" + oos_name + ".h5") # real noise at a volume of 30%
oos_x = data['X'][()]
oos_y = data['y'][()]
oos_idxs = data['start_idx'][()]
oos_x -= train_mean
oos_x /= (train_std + 1e-11)
err_total = 0
risk_err_total = 0
risk_total_oos = []
risk_right_total_oos = []
risk_wrong_total_oos = []
conf_total_oos = []
for i in range(oos_x.shape[0]//batch_size):
offset = i * batch_size
_bx, mask_x, _by = oos_x[offset:offset+batch_size], oos_idxs[offset:offset+batch_size], oos_y[offset:offset+batch_size]
bx, by = [], []
for i in range(_bx.shape[0]):
sentence_frames = add_context(_bx[i][mask_x[i]:])
bx.append(sentence_frames)
by.append(_by[i][mask_x[i]:])
bx, by = np.concatenate(bx), np.concatenate(by)
err, r_err, r, conf = sess.run([100*compute_error, 100*compute_risk_error,
tf.sigmoid(risk), tf.nn.softmax(logits)],
feed_dict={x: bx, y: by, risk_labels: np.zeros(by.shape[0]), is_training: False})
r_right = r[np.argmax(conf, axis=1).astype(np.int32) == by]
r_wrong = r[np.argmax(conf, axis=1).astype(np.int32) != by]
err_total += err
risk_err_total += r_err
risk_total_oos.append(r)
conf_total_oos.append(conf)
risk_right_total_oos.append(r_right)
risk_wrong_total_oos.append(r_wrong)
risk_err_total /= oos_x.shape[0]//batch_size
err_total /= oos_x.shape[0]//batch_size
print('\nTIMIT', oos_name, 'Frame Error (%) | TIMIT Frame Riskiness Error (0.5 cutoff) (%) | Frame Confidence (mean, std):')
print(err_total, '|', risk_err_total, '|', np.mean(np.max(np.concatenate(conf_total_oos), axis=1)),
np.std(np.max(np.concatenate(conf_total_oos), axis=1)))
risk_total = np.concatenate(risk_total).reshape(-1,1)
risk_right_total = np.concatenate(risk_right_total).reshape(-1,1)
risk_total_oos = np.concatenate(risk_total_oos).reshape(-1,1)
print('\nNormality Detection')
print('Normality base rate (%):', round(50,2))
print('Normality Detection')
safe, risky = risk_total, risk_total_oos
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[:safe.shape[0]] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('Normality base rate (%):', round(100*1./(1 + 1 - err_total/100),2))
print('Normality Detection (relative to correct examples)')
safe, risky = risk_right_total, risk_total_oos
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[:safe.shape[0]] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('\n\nAbnormality Detection')
print('Abnormality base rate (%):', round(50,2))
print('Abnormality Detection')
safe, risky = 1 - risk_total, 1 - risk_total_oos
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[safe.shape[0]:] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))
print('Abnormality base rate (%):', round(100*1./(1 + 1 - err_total/100),2))
print('Abnormality Detection (relative to correct examples)')
safe, risky = 1 - risk_right_total, 1 - risk_total_oos
labels = np.zeros((safe.shape[0] + risky.shape[0]), dtype=np.int32)
labels[safe.shape[0]:] += 1
examples = np.squeeze(np.vstack((safe, risky)))
print('AUPR (%):', round(100*sk.average_precision_score(labels, examples), 2))
print('AUROC (%):', round(100*sk.roc_auc_score(labels, examples), 2))