In [1]:
import tensorflow as tf
import numpy as np
import time
import os
import re
import collections
import random
import pickle

In [2]:
maxlen = 20
location = os.getcwd()
learning_rate = 0.0001
batch = 100

In [3]:
with open('dataset-emotion.p', 'rb') as fopen:
    df = pickle.load(fopen)
with open('vector-emotion.p', 'rb') as fopen:
    vectors = pickle.load(fopen)
with open('dataset-dictionary.p', 'rb') as fopen:
    dictionary = pickle.load(fopen)

In [4]:
label = np.unique(df[:,1])
from sklearn.cross_validation import train_test_split
train_X, test_X, train_Y, test_Y = train_test_split(df[:,0], df[:, 1].astype('int'), test_size = 0.2)


/usr/local/lib/python3.5/dist-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)

In [7]:
def add_conv1d(x, n_filters, kernel_size, strides=1):
    return tf.layers.conv1d(inputs = x,
                            filters = n_filters,
                            kernel_size  = kernel_size,
                            strides = strides,
                            padding = 'valid',
                            use_bias = True,
                            activation = tf.nn.relu)

class Model:
    def __init__(self, seq_len, dimension_input, dimension_output, learning_rate,
                 top_k=5, n_filters=250):
        self.n_filters = n_filters
        self.kernels = [3, 4, 5]
        self.top_k = top_k
        self.X = tf.placeholder(tf.float32, [None, seq_len, dimension_input])
        self.Y = tf.placeholder(tf.float32, [None, dimension_output])
        parallels = []
        for k in self.kernels:
            p = add_conv1d(self.X, self.n_filters//len(self.kernels), kernel_size=k)
            p = self.add_kmax_pooling(p)
            parallels.append(p)
        parallels = tf.concat(parallels, axis=-1)
        parallels = tf.reshape(parallels, [-1, self.top_k * (len(self.kernels)*(self.n_filters//len(self.kernels)))])
        feed = tf.nn.dropout(tf.layers.dense(parallels, self.n_filters, tf.nn.relu), 0.5)
        self.logits = tf.layers.dense(parallels, dimension_output)
        self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = self.logits, labels = self.Y))
        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
        self.correct_pred = tf.equal(tf.argmax(self.logits, 1), tf.argmax(self.Y, 1))
        self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32))
        
    def add_kmax_pooling(self, x):
        Y = tf.transpose(x, [0, 2, 1])
        Y = tf.nn.top_k(Y, self.top_k, sorted=False).values
        Y = tf.transpose(Y, [0, 2, 1])
        return tf.reshape(Y, [-1, self.top_k, self.n_filters//len(self.kernels)])

In [8]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model(maxlen, vectors.shape[1], label.shape[0], learning_rate)
sess.run(tf.global_variables_initializer())
dimension = vectors.shape[1]
saver = tf.train.Saver(tf.global_variables())
EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 10, 0, 0, 0
while True:
    lasttime = time.time()
    if CURRENT_CHECKPOINT == EARLY_STOPPING:
        print('break epoch:', EPOCH)
        break
    train_acc, train_loss, test_acc, test_loss = 0, 0, 0, 0
    for i in range(0, (train_X.shape[0] // batch) * batch, batch):
        batch_x = np.zeros((batch, maxlen, dimension))
        batch_y = np.zeros((batch, len(label)))
        for k in range(batch):
            tokens = train_X[i + k].split()[:maxlen]
            emb_data = np.zeros((maxlen, dimension), dtype = np.float32)
            for no, text in enumerate(tokens[::-1]):
                try:
                    emb_data[-1 - no, :] += vectors[dictionary[text], :]
                except Exception as e:
                    print(e)
                    continue
            batch_y[k, int(train_Y[i + k])] = 1.0
            batch_x[k, :, :] = emb_data[:, :]
        loss, _ = sess.run([model.cost, model.optimizer], feed_dict = {model.X : batch_x, model.Y : batch_y})
        train_loss += loss
        train_acc += sess.run(model.accuracy, feed_dict = {model.X : batch_x, model.Y : batch_y})
    
    for i in range(0, (test_X.shape[0] // batch) * batch, batch):
        batch_x = np.zeros((batch, maxlen, dimension))
        batch_y = np.zeros((batch, len(label)))
        for k in range(batch):
            tokens = test_X[i + k].split()[:maxlen]
            emb_data = np.zeros((maxlen, dimension), dtype = np.float32)
            for no, text in enumerate(tokens[::-1]):
                try:
                    emb_data[-1 - no, :] += vectors[dictionary[text], :]
                except:
                    continue
            batch_y[k, int(test_Y[i + k])] = 1.0
            batch_x[k, :, :] = emb_data[:, :]
        loss, acc = sess.run([model.cost, model.accuracy], feed_dict = {model.X : batch_x, model.Y : batch_y})
        test_loss += loss
        test_acc += acc
        
    train_loss /= (train_X.shape[0] // batch)
    train_acc /= (train_X.shape[0] // batch)
    test_loss /= (test_X.shape[0] // batch)
    test_acc /= (test_X.shape[0] // batch)
    if test_acc > CURRENT_ACC:
        print('epoch:', EPOCH, ', pass acc:', CURRENT_ACC, ', current acc:', test_acc)
        CURRENT_ACC = test_acc
        CURRENT_CHECKPOINT = 0
        saver.save(sess, os.getcwd() + "/model-rnn-vector.ckpt")
    else:
        CURRENT_CHECKPOINT += 1
    EPOCH += 1
    print('time taken:', time.time()-lasttime)
    print('epoch:', EPOCH, ', training loss:', train_loss, ', training acc:', train_acc, ', valid loss:', test_loss, ', valid acc:', test_acc)


epoch: 0 , pass acc: 0 , current acc: 0.7892557016226137
time taken: 804.0231313705444
epoch: 1 , training loss: 1.0405448560916384 , training acc: 0.6323185358431144 , valid loss: 0.656854874625498 , valid acc: 0.7892557016226137
epoch: 1 , pass acc: 0.7892557016226137 , current acc: 0.8698919552857993
time taken: 1351.138239622116
epoch: 2 , training loss: 0.49731766012395007 , training acc: 0.8376664650890737 , valid loss: 0.3876018189475173 , valid acc: 0.8698919552857993
epoch: 2 , pass acc: 0.8698919552857993 , current acc: 0.8877310929607515
time taken: 797.2636046409607
epoch: 3 , training loss: 0.32868029959343403 , training acc: 0.8840131976680263 , valid loss: 0.29582717079146 , valid acc: 0.8877310929607515
epoch: 3 , pass acc: 0.8877310929607515 , current acc: 0.8923889563149479
time taken: 796.8685095310211
epoch: 4 , training loss: 0.2662454009073969 , training acc: 0.897600480930206 , valid loss: 0.2612088197240738 , valid acc: 0.8923889563149479
epoch: 4 , pass acc: 0.8923889563149479 , current acc: 0.8929891963394321
time taken: 796.6610190868378
epoch: 5 , training loss: 0.2372481707320395 , training acc: 0.903623276366112 , valid loss: 0.24514047085952645 , valid acc: 0.8929891963394321
time taken: 796.6961421966553
epoch: 6 , training loss: 0.2200671689206065 , training acc: 0.9078434326009401 , valid loss: 0.23638642628510603 , valid acc: 0.8929531825404493
time taken: 796.8826093673706
epoch: 7 , training loss: 0.208123647843068 , training acc: 0.9109748064863803 , valid loss: 0.23125186870221187 , valid acc: 0.8921848746622596
time taken: 796.6506168842316
epoch: 8 , training loss: 0.1989910382337211 , training acc: 0.913800241654693 , valid loss: 0.22816874272003324 , valid acc: 0.8910564236423405
time taken: 796.6401255130768
epoch: 9 , training loss: 0.19156466832302899 , training acc: 0.9160467918825922 , valid loss: 0.2263157782094533 , valid acc: 0.8905642268752136
time taken: 797.0528984069824
epoch: 10 , training loss: 0.18526453343144633 , training acc: 0.9180743866468806 , valid loss: 0.22532645118336717 , valid acc: 0.8898079237159418
time taken: 796.5553665161133
epoch: 11 , training loss: 0.17975202326042142 , training acc: 0.9200539908047272 , valid loss: 0.2249716002435959 , valid acc: 0.8891716691053787
time taken: 796.8624227046967
epoch: 12 , training loss: 0.17481807256972245 , training acc: 0.9217726472758503 , valid loss: 0.22506386032696962 , valid acc: 0.8884753898269131
time taken: 796.3765687942505
epoch: 13 , training loss: 0.1703198642571064 , training acc: 0.9236472719754011 , valid loss: 0.22551165249238925 , valid acc: 0.8876230493456233
time taken: 796.4298026561737
epoch: 14 , training loss: 0.1661635103474031 , training acc: 0.9251289759557549 , valid loss: 0.22629864114959414 , valid acc: 0.8869867948066144
time taken: 796.5306029319763
epoch: 15 , training loss: 0.16230329809499203 , training acc: 0.92655968966424 , valid loss: 0.2273115130747352 , valid acc: 0.8860024001274933
break epoch: 15

In [ ]: