In [1]:
import numpy as np
import tensorflow as tf
# %matplotlib inline
# import matplotlib.pylab as plt
from helper_functions_twitter import *

%load_ext autoreload
%autoreload 2

In [10]:
window_size = 1

# note that we encode the tags with numbers for later convenience
tag_to_number = {
    u'N': 0, u'O': 1, u'S': 2, u'^': 3, u'Z': 4, u'L': 5, u'M': 6,
    u'V': 7, u'A': 8, u'R': 9, u'!': 10, u'D': 11, u'P': 12, u'&': 13, u'T': 14,
    u'X': 15, u'Y': 16, u'#': 17, u'@': 18, u'~': 19, u'U': 20, u'E': 21, u'$': 22,
    u',': 23, u'G': 24
}

embeddings = embeddings_to_dict('./data/Tweets/embeddings-twitter.txt')
vocab = embeddings.keys()

# we replace <s> with </s> since it has no embedding, and </s> is a better embedding than UNK
xt, yt = data_to_mat('./data/Tweets/tweets-train.txt', vocab, tag_to_number, window_size=window_size,
                     start_symbol=u'</s>', one_hot=True)
xdev, ydev = data_to_mat('./data/Tweets/tweets-dev.txt', vocab, tag_to_number, window_size=window_size,
                         start_symbol=u'</s>', one_hot=True)
xdtest, ydtest = data_to_mat('./data/Tweets/tweets-devtest.txt', vocab, tag_to_number, window_size=window_size,
                             start_symbol=u'</s>', one_hot=True)

data = {
    'x_train': xt, 'y_train': yt,
    'x_dev': xdev, 'y_dev': ydev,
    'x_devtest': xdtest, 'y_devtest': ydtest
}

In [80]:
def train_and_test(mode="c_is_softmax_prob", seed=100, learning_rate=0.001):
    
    training_epochs = 20
    n_labels = 25
    batch_size = 64
    embedding_dimension = 50
    example_size = (2*window_size + 1)*embedding_dimension
    num_examples = data['y_train'].shape[0]
    num_batches = num_examples//batch_size
    
    '''
    modes: c_is_softmax_prob, c_is_trained_softmax_prob, c_is_cotrained_sigmoid, c_is_auxiliary_sigmoid
    '''
    
    graph = tf.Graph()
    with graph.as_default():
        tf.set_random_seed(seed)  # seed set upon graph construction; does not work

        x = tf.placeholder(dtype=tf.float32, shape=[None, example_size])
        y = tf.placeholder(dtype=tf.float32, shape=[None, n_labels])

        def gelu(x):
            return 0.5 * x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))
        f = gelu

        W = {}
        b = {}

        with tf.variable_scope("classifier"):
            W['1'] = tf.Variable(tf.nn.l2_normalize(tf.random_normal([example_size, 256]), 0))
            W['2'] = tf.Variable(tf.nn.l2_normalize(tf.random_normal([256, 256]), 0))
            W['3'] = tf.Variable(tf.nn.l2_normalize(tf.random_normal([256, 256]), 0))
            W['logits'] = tf.Variable(tf.nn.l2_normalize(tf.random_normal([256, n_labels]), 0))

            b['1'] = tf.Variable(tf.zeros([256]))
            b['2'] = tf.Variable(tf.zeros([256]))
            b['3'] = tf.Variable(tf.zeros([256]))
            b['logits'] = tf.Variable(tf.zeros([n_labels]))

        with tf.variable_scope("confidence_scorer"):
            W['hidden_to_conf1'] = tf.Variable(tf.nn.l2_normalize(tf.random_normal([256, 512]), 0))
            W['logits_to_conf1'] = tf.Variable(tf.nn.l2_normalize(tf.random_normal([n_labels, 512]), 0))
            W['conf2'] = tf.Variable(tf.nn.l2_normalize(tf.random_normal([512, 128]), 0))
            W['conf'] = tf.Variable(tf.nn.l2_normalize(tf.random_normal([128, 1]), 0))

            b['conf1'] = tf.Variable(tf.zeros([512]))
            b['conf2'] = tf.Variable(tf.zeros([128]))
            b['conf'] = tf.Variable(tf.zeros([1]))

        def cautious_fcn(x):
            h1 = f(tf.matmul(x, W['1']) + b['1'])
            h2 = f(tf.matmul(h1, W['2']) + b['2'])
            h3 = f(tf.matmul(h2, W['3']) + b['3'])
            logits_out = tf.matmul(h3, W['logits']) + b['logits']

            conf1 = f(tf.matmul(logits_out, W['logits_to_conf1']) +
                        tf.matmul(h2, W['hidden_to_conf1']) + b['conf1'])
            conf2 = f(tf.matmul(conf1, W['conf2']) + b['conf2'])
            conf_out = tf.matmul(conf2, W['conf']) + b['conf']

            return logits_out, tf.squeeze(conf_out)

        logits, confidence_logit = cautious_fcn(x)

        right_answer = tf.stop_gradient(tf.to_float(tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))))
        compute_error = 100*tf.reduce_mean(1 - right_answer)

        classification_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, y))
        if "softmax" in mode:
            confidence_logit = tf.reduce_max(tf.nn.softmax(logits), reduction_indices=[1])
            caution_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(confidence_logit, right_answer))
            
            # cc_loss is cautious classification loss
            if mode == "c_is_trained_softmax_prob":
                cc_loss = classification_loss + caution_loss
            else:
                cc_loss = classification_loss
        
        elif mode == "c_is_cotrained_sigmoid":
            caution_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(confidence_logit, right_answer))
            cc_loss = classification_loss + caution_loss
            confidence = tf.sigmoid(confidence_logit)
        elif mode == "c_is_auxiliary_sigmoid":
            caution_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(confidence_logit, right_answer))
            cc_loss = classification_loss  # we use caution_loss after training normal classifier
        else:
            assert False, "Invalid mode specified"
        
        cc_calibration_score = tf.reduce_mean((2 * right_answer - 1) * (2 * tf.sigmoid(confidence_logit) - 1))
        cc_model_score = tf.reduce_mean(right_answer * ((2 * right_answer - 1) * (2 * tf.sigmoid(confidence_logit) - 1)+ 1)/2)
        
        # cautious classification perplexity
        cc_calibration_perplexity = tf.exp(caution_loss)
        cc_model_perplexity = tf.exp(caution_loss + classification_loss)
        
        lr = tf.constant(learning_rate)
        optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(cc_loss)

    sess = tf.InteractiveSession(graph=graph)
    
    if "softmax" in mode:
        sess.run(tf.initialize_all_variables())
    
    elif mode == "c_is_cotrained_sigmoid":
        sess.run(tf.initialize_all_variables())
    
    elif mode == "c_is_auxiliary_sigmoid":
        thawed_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "classifier")
        frozen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "confidence_scorer")
        sess.run(tf.initialize_variables(set(tf.all_variables()) - set(frozen_vars)))

    err_ema = 90
    cc_calibration_perp_ema = 10
    cc_model_perp_ema = 10
    cc_calibration_score_ema = -1
    cc_model_score_ema = -1

    for epoch in range(1,training_epochs+1):
        # shuffle data
        indices = np.arange(num_examples)
        np.random.shuffle(indices)
        data['x_train'] = data['x_train'][indices]
        data['y_train'] = data['y_train'][indices]

        for i in range(num_batches):
            offset = i * batch_size

            bx = word_list_to_embedding(data['x_train'][offset:offset + batch_size, :],
                                                 embeddings, embedding_dimension)
            by = data['y_train'][offset:offset + batch_size]

            if mode != "c_is_auxiliary_sigmoid":
                _, err, cc_model_score_curr, cc_calibration_score_curr,\
                cc_model_perp_curr, cc_calibration_perp_curr = sess.run([
                        optimizer, compute_error, cc_model_score, cc_calibration_score,
                        cc_model_perplexity, cc_calibration_perplexity],
                     feed_dict={x: bx, y: by, lr: learning_rate})

                err_ema = err_ema * 0.95 + 0.05 * err
                cc_calibration_perp_ema = cc_calibration_perp_ema * 0.95 + 0.05 * cc_calibration_perp_curr
                cc_model_perp_ema = cc_model_perp_ema * 0.95 + 0.05 * cc_model_perp_curr
                cc_calibration_score_ema = cc_calibration_score_ema * 0.95 + 0.05 * cc_calibration_score_curr
                cc_model_score_ema = cc_model_score_ema * 0.95 + 0.05 * cc_model_score_curr
            else:
                _, err = sess.run([optimizer, compute_error],
                                  feed_dict={x: bx, y: by, lr: learning_rate})
                err_ema = err_ema * 0.95 + 0.05 * err

        if epoch % 10 == 0:
            print('Epoch', epoch, ' | ', 'Current Classification Error (%)', err_ema)
            if mode != "c_is_auxiliary_sigmoid":
                print('Epoch', epoch, ' | ', 'Cautious Classification Calibration Perp', cc_calibration_perp_ema)
                print('Epoch', epoch, ' | ', 'Cautious Classification Model Perp', cc_model_perp_ema)
                print('Epoch', epoch, ' | ', 'Cautious Classification Calibration Score', cc_calibration_score_ema)
                print('Epoch', epoch, ' | ', 'Cautious Classification Model Score', cc_model_score_ema)

    if mode == "c_is_auxiliary_sigmoid":
        # train sigmoid separately from the classifier
        phase2_vars = list(set(tf.all_variables()) - set(thawed_vars))
        optimizer2 = tf.train.AdamOptimizer(learning_rate=0.001).minimize(caution_loss, var_list=phase2_vars)
        sess.run(tf.initialize_variables(set(tf.all_variables()) - set(thawed_vars)))

        for epoch in range(3):
            for i in range(num_batches):
                offset = i * batch_size

                bx = word_list_to_embedding(data['x_train'][offset:offset + batch_size, :],
                                                     embeddings, embedding_dimension)
                by = data['y_train'][offset:offset + batch_size]

                sess.run([optimizer2], feed_dict={x: bx, y: by})

    err, cc_model_score_test, cc_calibration_score_test,\
    cc_model_perp_test, cc_calibration_perp_test = sess.run([
                    compute_error, cc_model_score, cc_calibration_score,
                    cc_model_perplexity, cc_calibration_perplexity],
    feed_dict={x: word_list_to_embedding(data['x_devtest'], embeddings, embedding_dimension),
               y: data['y_devtest']})

    print('Test Classification Error (%)', err)
    print('Test Cautious Classification Calibration Perp', cc_calibration_perp_test)
    print('Test Cautious Classification Model Perp', cc_model_perp_test)
    print('Test Cautious Classification Calibration Score', cc_calibration_score_test)
    print('Test Cautious Classification Model Score', cc_model_score_test)

    sess.close()

In [85]:
train_and_test()
train_and_test()
train_and_test()


Epoch 10  |  Current Classification Error (%) 7.64026743057
Epoch 10  |  Cautious Classification Calibration Perp 1.46547546575
Epoch 10  |  Cautious Classification Model Perp 1.85142755641
Epoch 10  |  Cautious Classification Calibration Score 0.386193511063
Epoch 10  |  Cautious Classification Model Score 0.665618254806
Epoch 20  |  Current Classification Error (%) 1.56097880302
Epoch 20  |  Cautious Classification Calibration Perp 1.39106289344
Epoch 20  |  Cautious Classification Model Perp 1.47106011707
Epoch 20  |  Cautious Classification Calibration Score 0.443357149912
Epoch 20  |  Cautious Classification Model Score 0.716329939717
Test Classification Error (%) 13.7304
Test Cautious Classification Calibration Perp 1.5507
Test Cautious Classification Model Perp 3.51644
Test Cautious Classification Calibration Score 0.337601
Test Cautious Classification Model Score 0.626207
Epoch 10  |  Current Classification Error (%) 6.52178477683
Epoch 10  |  Cautious Classification Calibration Perp 1.45401980373
Epoch 10  |  Cautious Classification Model Perp 1.81153794018
Epoch 10  |  Cautious Classification Calibration Score 0.394639810404
Epoch 10  |  Cautious Classification Model Score 0.673948091463
Epoch 20  |  Current Classification Error (%) 2.4737086154
Epoch 20  |  Cautious Classification Calibration Perp 1.40150844649
Epoch 20  |  Cautious Classification Model Perp 1.51113015682
Epoch 20  |  Cautious Classification Calibration Score 0.435446983213
Epoch 20  |  Cautious Classification Model Score 0.709165153082
Test Classification Error (%) 13.9681
Test Cautious Classification Calibration Perp 1.55188
Test Cautious Classification Model Perp 3.64268
Test Cautious Classification Calibration Score 0.337287
Test Cautious Classification Model Score 0.625224
Epoch 10  |  Current Classification Error (%) 7.5293218826
Epoch 10  |  Cautious Classification Calibration Perp 1.46495451536
Epoch 10  |  Cautious Classification Model Perp 1.8431294227
Epoch 10  |  Cautious Classification Calibration Score 0.386694858861
Epoch 10  |  Cautious Classification Model Score 0.666371045987
Epoch 20  |  Current Classification Error (%) 2.00522231909
Epoch 20  |  Cautious Classification Calibration Perp 1.39658729767
Epoch 20  |  Cautious Classification Model Perp 1.49277278706
Epoch 20  |  Cautious Classification Calibration Score 0.43909283851
Epoch 20  |  Cautious Classification Model Score 0.712649607651
Test Classification Error (%) 14.2198
Test Cautious Classification Calibration Perp 1.55474
Test Cautious Classification Model Perp 3.61556
Test Cautious Classification Calibration Score 0.33516
Test Cautious Classification Model Score 0.623195

In [86]:
train_and_test("c_is_cotrained_sigmoid")
train_and_test("c_is_cotrained_sigmoid")
train_and_test("c_is_cotrained_sigmoid")


Epoch 10  |  Current Classification Error (%) 7.48396218212
Epoch 10  |  Cautious Classification Calibration Perp 1.1575132031
Epoch 10  |  Cautious Classification Model Perp 1.46652571859
Epoch 10  |  Cautious Classification Calibration Score 0.820281630018
Epoch 10  |  Cautious Classification Model Score 0.88133875657
Epoch 20  |  Current Classification Error (%) 2.64868468146
Epoch 20  |  Cautious Classification Calibration Perp 1.07687996985
Epoch 20  |  Cautious Classification Model Perp 1.17345266493
Epoch 20  |  Cautious Classification Calibration Score 0.920370430996
Epoch 20  |  Cautious Classification Model Score 0.951495754674
Test Classification Error (%) 13.9402
Test Cautious Classification Calibration Perp 1.57654
Test Cautious Classification Model Perp 3.31664
Test Cautious Classification Calibration Score 0.720826
Test Cautious Classification Model Score 0.84076
Epoch 10  |  Current Classification Error (%) 7.2899537807
Epoch 10  |  Cautious Classification Calibration Perp 1.16116282811
Epoch 10  |  Cautious Classification Model Perp 1.46671502571
Epoch 10  |  Cautious Classification Calibration Score 0.823509813979
Epoch 10  |  Cautious Classification Model Score 0.883757523806
Epoch 20  |  Current Classification Error (%) 1.66209556079
Epoch 20  |  Cautious Classification Calibration Perp 1.04735118208
Epoch 20  |  Cautious Classification Model Perp 1.11922261748
Epoch 20  |  Cautious Classification Calibration Score 0.94650470986
Epoch 20  |  Cautious Classification Model Score 0.968646617144
Test Classification Error (%) 13.0872
Test Cautious Classification Calibration Perp 1.9009
Test Cautious Classification Model Perp 3.9842
Test Cautious Classification Calibration Score 0.744274
Test Cautious Classification Model Score 0.860583
Epoch 10  |  Current Classification Error (%) 7.69865044825
Epoch 10  |  Cautious Classification Calibration Perp 1.17741953998
Epoch 10  |  Cautious Classification Model Perp 1.50496898407
Epoch 10  |  Cautious Classification Calibration Score 0.801836216427
Epoch 10  |  Cautious Classification Model Score 0.872736561717
Epoch 20  |  Current Classification Error (%) 2.00094767709
Epoch 20  |  Cautious Classification Calibration Perp 1.06933431412
Epoch 20  |  Cautious Classification Model Perp 1.14531232709
Epoch 20  |  Cautious Classification Calibration Score 0.932037780647
Epoch 20  |  Cautious Classification Model Score 0.961318182405
Test Classification Error (%) 13.5766
Test Cautious Classification Calibration Perp 1.6671
Test Cautious Classification Model Perp 3.48629
Test Cautious Classification Calibration Score 0.731238
Test Cautious Classification Model Score 0.851706

In [83]:
train_and_test("c_is_auxiliary_sigmoid")


Epoch 10  |  Current Classification Error (%) 6.30479048276
Epoch 20  |  Current Classification Error (%) 2.46373553995
Test Classification Error (%) 13.8003
Test Cautious Classification Calibration Perp 3.28663
Test Cautious Classification Model Perp 7.52278
Test Cautious Classification Calibration Score 0.725052
Test Cautious Classification Model Score 0.860351