In [1]:
from __future__ import print_function
import collections
import numpy as np
import pandas as pd
import pickle
import copy
import time

import sklearn
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import LSTM
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.regularizers import l1
from keras import backend as K
from keras.models import model_from_json

import tensorflow as tf

from sklearn.model_selection import train_test_split

# fix random seed for reproducibility
np.random.seed(7)

# create session explicitly and keep a reference
# so we can access and evaluate tensors directly 
sess = tf.Session()
K.set_session(sess)


Using TensorFlow backend.

In [2]:
# Global config variables 
model_name = "streeteye_lstm"
#data_file = "lstm_dump_test.txt"
data_file = "dump_2017_words.txt"

checkpoint_dir = "/home/ubuntu/mount/Notebooks/checkpoints"
tensorboard_dir ="/home/ubuntu/mount/Notebooks/tensorboard"

In [3]:
############################################################
# 1. load data
############################################################

# load dataset
print("Loading data...")
data=[]
y=[]

# count words
c = collections.Counter()

with open(data_file, "r") as infile:
    for line in infile:
        l = line.rstrip('\n').split(",")
        label = l.pop(0)
        # skip empty headlines
        if len(l[0]) == 0:
            continue
        if '' in l:
            l = [w for w in l if w]
        data.append(l)
        y.append(label)
        c.update(l)
        
print("Loaded data.")


Loading data...
Loaded data.

In [4]:
# create a list of top words        
vocabulary_size = 10000 # set this to have ~20 for least popular
count = [['UNK', -1]]
count.extend(c.most_common(vocabulary_size - 1))
print(count[:10])
print(count[-10:])


[['UNK', -1], ('domain_otherdomain', 119708), ('subsource_othersubsource', 47862), ('trump', 21141), ('with', 10761), ('domain_youtube.com', 8908), ('us', 8434), ('2017', 7862), ('from', 7768), ('subsource_memeorandum', 7712)]
[('hazard', 17), ('alexei', 17), ('molly', 17), ('expel', 17), ('champ', 17), ('admiral', 17), ('conversational', 17), ('memorable', 17), ('wharton', 17), ('torn', 17)]

In [5]:
dictionary = dict()
# map words into a dict of ints
for word, _ in count:
    dictionary[word] = len(dictionary)

data_embeddings=[]
unk_count = 0

for obs in data:
    embedlist = []
    for word in obs:
        if word in dictionary:
            index = dictionary[word]
        else:
            index = 0  # dictionary['UNK']
            unk_count = unk_count + 1
        embedlist.append(index)
    data_embeddings.append(embedlist)
        
count[0][1] = unk_count
reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))

In [6]:
print(dictionary['trump'])
print(reverse_dictionary[3])


3
trump

In [7]:
%matplotlib inline
ls = (map(len, data_embeddings))
pd.DataFrame(ls).hist()


Out[7]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7fd4e4911c90>]], dtype=object)

In [8]:
MAX_LENGTH = 120
X = sequence.pad_sequences(data_embeddings, maxlen=MAX_LENGTH)
X[0]
X.shape


Out[8]:
(218419, 120)

In [9]:
y=np.array(np.float32(y))

y=y.reshape((y.shape[0],1))
print(y.shape)
num_labels=1

num_obs, num_features = X.shape
print("Observations: %d\nFeatures: %d" % (num_obs, num_features))

# split into training, xval, test, 60/20/20
print("Split into training, temp")
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4)
print("Split into xval, test")
X_xval, X_test, y_xval, y_test = train_test_split(X_temp, y_temp, test_size=0.5)


(218419, 1)
Observations: 218419
Features: 120
Split into training, temp
Split into xval, test

In [10]:
print("Training set")
print(X_train.shape)

print("Xval set")
print(X_xval.shape)

print("Test set")
print(X_test.shape)

num_training_samples = X_train.shape[0]
num_xval_samples = X_xval.shape[0]
num_test_samples = X_test.shape[0]

print ("\nTraining observations:  %d  \nXval observations:  %d  \nTest observations:  %d\n" % (num_training_samples, num_xval_samples, num_test_samples))


Training set
(131051, 120)
Xval set
(43684, 120)
Test set
(43684, 120)

Training observations:  131051  
Xval observations:  43684  
Test observations:  43684


In [11]:
# initialize embeddings to pre-trained vals
pkl_file = open('embeddings.pkl', 'rb')
embeddings_dict, embeddings_reverse_dict, embeddings_data = pickle.load(pkl_file)

In [12]:
EMBEDDING_DIM=300

# +1 seems like an off-by-one somewhere
embedding_matrix = np.zeros((len(dictionary) + 1, EMBEDDING_DIM))

count = 0
for word, i in dictionary.items():
    #print(word)
    embed_i = embeddings_dict.get(word)
    if embed_i is not None:
        embedding_vector = embeddings_data[i]
        count +=1
        embedding_matrix[i] = embedding_vector
        
print("initialized %d embeddings" % count)


initialized 10000 embeddings

In [ ]:
# define some custom metrics
import keras.backend as K

def recall(y_true, y_pred):
    # return keras tensor for recall
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision(y_true, y_pred):
    # return keras tensor for precision
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def fbeta_score(y_true, y_pred, beta=1):
    if beta < 0:
        raise ValueError('The lowest choosable beta is zero (only precision).')

    # If there are no true positives, fix the F score at 0 like sklearn.
    if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
        return 0

    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    bb = beta ** 2
    fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
    return fbeta_score

def f_score(y_true, y_pred):
    beta = 1 #  can adjust to penalize false positives/negatives
    return fbeta_score(y_true, y_pred, beta=beta)

In [18]:
# function to generate model

def create_model(lstm_size=30, lstm_reg_penalty=0.0, lstm_dropout=0.0, sigmoid_dropout=(1.0/3.0), sigmoid_reg_penalty=0.0001, train_embed=True):
    # create model
    model = Sequential()

    model.add(Embedding(len(dictionary) + 1, 
                        embedding_vector_length, 
                        weights=[embedding_matrix],
                        input_length=MAX_LENGTH,
                        trainable=train_embed))
    
    # LSTM with lstm_size units
    model.add(LSTM(lstm_size,
                   kernel_regularizer=l1(lstm_reg_penalty)))
    model.add(Dropout(sigmoid_dropout))
    
    model.add(Dense(1, 
                    activation='sigmoid',
                    kernel_initializer='TruncatedNormal', 
                    kernel_regularizer=l1(sigmoid_reg_penalty)))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', f_score])
    print(model.summary())
    return model

In [14]:
def selectThreshold (logits, labels, beta=(2.0/3)):
    # return threshold, f-score that yields best F-score
    # predict using true if >= threshold

    precision, recall, thresholds = sklearn.metrics.precision_recall_curve(labels, logits)
    bb = beta**2
    f1_scores = (1 + bb) * precision * recall / (bb * precision + recall)
    f1_scores = np.nan_to_num(f1_scores)
    
    best_index = np.argmax(f1_scores)
    best_threshold = thresholds[best_index]
    best_score = f1_scores[best_index]
    return (best_threshold, best_score)

In [15]:
# grid search - first train end-to-end, then freeze embeddings
embedding_vector_length = EMBEDDING_DIM

for sig_reg_penalty in [0.00003]:
    for dropout in [0.333]:
        for lstm_units in [16, 32, 64, 128]:
            for lstm_reg_penalty in [0.00000,]:
                #0.000001, 0.000003, 0.00001, 0.00003]:
                models = []
                xval_losses = []

                model = create_model(lstm_size=lstm_units, 
                                     lstm_reg_penalty=lstm_reg_penalty, 
                                     sigmoid_dropout=dropout, 
                                     sigmoid_reg_penalty=sig_reg_penalty)
                print('%s Starting (unfrozen)...' % time.strftime("%H:%M:%S"))
                print ("LSTM units %d" % lstm_units)
                print ("LSTM reg_penalty %.8f" % lstm_reg_penalty)
                print ("Sigmoid dropout %.4f" %  dropout)
                print ("Sigmoid reg_penalty %.8f" % sig_reg_penalty)

                ##################################################################
                # train end-to-end including embeddings until xval loss bottoms out
                ##################################################################
                
                epochs = 10
                for _ in range(epochs):
                    fit = model.fit(X_train, y_train, validation_data=(X_xval, y_xval), epochs=1, batch_size=1024)
                    # save loss
                    train_loss = fit.history['loss'][-1]
                    train_acc = fit.history['acc'][-1]
                    xval_loss = fit.history['val_loss'][-1]
                    xval_acc = fit.history['val_acc'][-1]
                    xval_losses.append(xval_loss)
                    models.append(copy.copy(model))

                    bestloss_index = np.argmin(xval_losses)
                    bestloss_value = xval_losses[bestloss_index]

                    # break if loss rises by 10% from best
                    if xval_loss / bestloss_value > 1.1:
                        break
                    
                # evaluate and save model from epoch with best xval loss
                print ("%s Best Xval loss epoch %d, value %f" % (time.strftime("%H:%M:%S"), bestloss_index, bestloss_value))
                model = models[bestloss_index]

                print('%s Finished (unfrozen)...' % time.strftime("%H:%M:%S"))
                print ("LSTM units %d" % lstm_units)
                print ("LSTM reg_penalty %.8f" % lstm_reg_penalty)
                print ("Sigmoid dropout %.4f" %  dropout)
                print ("Sigmoid reg_penalty %.8f" % sig_reg_penalty)                
                
                y_train_prob = model.predict(X_train)
                
                beta=(2.0/3.0) # penalize false positives more than false negatives
                thresh, score = selectThreshold(y_train_prob, y_train, beta=beta)
                y_train_pred = y_train_prob >= thresh

                
                print("%s Train Accuracy %.3f, Train F1 %.3f, f_score %.3f (beta %.3f)" % 
                      (time.strftime("%H:%M:%S"),
                       sklearn.metrics.accuracy_score(y_train_pred, y_train), 
                       sklearn.metrics.f1_score(y_train_pred, y_train),
                       score, beta))
                
                print(sklearn.metrics.confusion_matrix(y_train_pred, y_train))

                y_xval_prob = model.predict(X_xval)
                
                thresh, score = selectThreshold(y_xval_prob, y_xval, beta=beta)
                y_xval_pred = y_xval_prob >= thresh
                
                print("%s Xval Accuracy %.3f, Xval F1 %.3f, f_score %.3f (beta %.3f)" % 
                      (time.strftime("%H:%M:%S"),
                       sklearn.metrics.accuracy_score(y_xval_pred, y_xval), 
                       sklearn.metrics.f1_score(y_xval_pred, y_xval),
                       score, beta))
                
                confusion_matrix = sklearn.metrics.confusion_matrix(y_xval_pred, y_xval)
                print(confusion_matrix)
                false_positive = confusion_matrix[1][0]
                false_negative = confusion_matrix[0][1]
                true_positive = confusion_matrix[1][1]
                raw_score = 1.0 * (true_positive - false_positive) / np.sum(confusion_matrix)
                print ("Raw score 2 %f" % raw_score)
                
                # save model to disk
                print('%s Saving...' % time.strftime("%H:%M:%S"))               
                modelname = "unfrozen_%d_%.6f_%.3f_%.6f" % (lstm_units, lstm_reg_penalty, dropout, sig_reg_penalty)
                model.save("%s.h5" % modelname)
                model.save_weights("%s_weights.h5" % modelname)
                with open("%s.json" % modelname, "wb") as fjson:
                    fjson.write(model.to_json()) 

                # back off from best... best is usually already overfitted with .9 train f1
                if bestloss_index > 2:
                    resume_index = bestloss_index - 2
                else:
                    resume_index = 0
                print ("%s Further training from epoch %d, value %f" % (time.strftime("%H:%M:%S"), resume_index, bestloss_value))
                model = models[resume_index]

                ##################################################################
                # freeze embeddings, train LSTM until xval loss bottoms out
                ##################################################################
                elayer = model.layers[0]
                
                elayer.trainable = False
                model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
                print(model.summary())

                # Could also keep existing list of models in case further training makes it worse                
                models = []
                xval_losses = []
                epochs = 20

                print('%s Continuing (frozen)...' % time.strftime("%H:%M:%S"))

                for _ in range(epochs):
                    fit = model.fit(X_train, y_train, validation_data=(X_xval, y_xval), epochs=1, batch_size=1024)
                    # save losses
                    train_loss = fit.history['loss'][-1]
                    train_acc = fit.history['acc'][-1]
                    xval_loss = fit.history['val_loss'][-1]
                    xval_acc = fit.history['val_acc'][-1]
                    xval_losses.append(xval_loss)
                    models.append(copy.copy(model))

                    bestloss_index = np.argmin(xval_losses)
                    bestloss_value = xval_losses[bestloss_index]
    
                    # break if loss rises by 10% from best
                    if xval_loss / bestloss_value > 1.1:
                        break
                    
                # evaluate and save model from epoch with best xval loss
                print ("%s Best Xval loss epoch %d, value %f" % (time.strftime("%H:%M:%S"), bestloss_index, bestloss_value))
                model = models[bestloss_index]

                print('%s Finished (frozen)...' % time.strftime("%H:%M:%S"))
                print ("LSTM units %d" % lstm_units)
                print ("LSTM reg_penalty %.8f" % lstm_reg_penalty)
                print ("Sigmoid dropout %.4f" %  dropout)
                print ("Sigmoid reg_penalty %.8f" % sig_reg_penalty)                
               
                y_train_prob = model.predict(X_train)
                
                beta=(2.0/3.0) # penalize false positives more than false negatives
                thresh, score = selectThreshold(y_train_prob, y_train, beta=beta)
                y_train_pred = y_train_prob >= thresh
                
                print("%s Train Accuracy %.3f, Train F1 %.3f, f_score %.3f (beta %.3f)" % 
                      (time.strftime("%H:%M:%S"),
                       sklearn.metrics.accuracy_score(y_train_pred, y_train),
                       sklearn.metrics.f1_score(y_train_pred, y_train),
                       score, beta))
                
                print(sklearn.metrics.confusion_matrix(y_train_pred, y_train))

                y_xval_prob = model.predict(X_xval)
                
                thresh, score = selectThreshold(y_xval_prob, y_xval, beta=beta)
                y_xval_pred = y_xval_prob >= thresh

                print("%s Xval Accuracy %.3f, Xval F1 %.3f, f_score %.3f (beta %.3f)" % 
                      (time.strftime("%H:%M:%S"),
                       sklearn.metrics.accuracy_score(y_xval_pred, y_xval), 
                       sklearn.metrics.f1_score(y_xval_pred, y_xval),
                       score, beta))
                
                confusion_matrix = sklearn.metrics.confusion_matrix(y_xval_pred, y_xval)
                print(confusion_matrix)
                false_positive = confusion_matrix[1][0]
                false_negative = confusion_matrix[0][1]
                true_positive = confusion_matrix[1][1]

                raw_score = 1.0 * (true_positive - false_positive) / np.sum(confusion_matrix)
                print ("Raw score 2 %f" % raw_score)
                
                # save model to disk
                print('%s Saving...' % time.strftime("%H:%M:%S"))               
                modelname = "model_%d_%.6f_%.3f_%.6f" % (lstm_units, lstm_reg_penalty, dropout, sig_reg_penalty)
                model.save("%s.h5" % modelname)
                model.save_weights("%s_weights.h5" % modelname)
                with open("%s.json" % modelname, "wb") as fjson:
                    fjson.write(model.to_json())


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_1 (Embedding)      (None, 120, 300)          3000300   
_________________________________________________________________
lstm_1 (LSTM)                (None, 16)                20288     
_________________________________________________________________
dropout_1 (Dropout)          (None, 16)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 17        
=================================================================
Total params: 3,020,605.0
Trainable params: 3,020,605.0
Non-trainable params: 0.0
_________________________________________________________________
None
13:49:47 Starting (unfrozen)...
LSTM units 16
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.3330
Sigmoid reg_penalty 0.00003000
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 313s - loss: 0.3298 - acc: 0.9659 - val_loss: 0.1507 - val_acc: 0.9709
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 312s - loss: 0.1398 - acc: 0.9726 - val_loss: 0.1333 - val_acc: 0.9709
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 202s - loss: 0.1309 - acc: 0.9726 - val_loss: 0.1317 - val_acc: 0.9709
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 163s - loss: 0.1206 - acc: 0.9726 - val_loss: 0.0806 - val_acc: 0.9709
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 163s - loss: 0.0647 - acc: 0.9827 - val_loss: 0.0554 - val_acc: 0.9842
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 163s - loss: 0.0470 - acc: 0.9878 - val_loss: 0.0516 - val_acc: 0.9842
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 163s - loss: 0.0360 - acc: 0.9912 - val_loss: 0.0555 - val_acc: 0.9837
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 163s - loss: 0.0282 - acc: 0.9937 - val_loss: 0.0594 - val_acc: 0.9826
14:17:15 Best Xval loss epoch 5, value 0.051646
14:17:15 Using epoch 4, value 0.051646
14:17:15 Finished (unfrozen)...
LSTM units 16
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.3330
Sigmoid reg_penalty 0.00003000
14:18:34 Train Accuracy 0.996, Train F1 0.929, f_score 0.939 (beta 0.667)
[[127310    349]
 [   150   3242]]
14:19:00 Xval Accuracy 0.983, Xval F1 0.676, f_score 0.708 (beta 0.667)
[[42175   503]
 [  236   770]]
Raw score 2 0.012224
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_1 (Embedding)      (None, 120, 300)          3000300   
_________________________________________________________________
lstm_1 (LSTM)                (None, 16)                20288     
_________________________________________________________________
dropout_1 (Dropout)          (None, 16)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 17        
=================================================================
Total params: 3,020,605.0
Trainable params: 20,305.0
Non-trainable params: 3,000,300.0
_________________________________________________________________
None
14:19:00 Continuing (frozen)...
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 130s - loss: 0.0202 - acc: 0.9959 - val_loss: 0.0725 - val_acc: 0.9826
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 129s - loss: 0.0188 - acc: 0.9960 - val_loss: 0.0762 - val_acc: 0.9828
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 129s - loss: 0.0186 - acc: 0.9962 - val_loss: 0.0749 - val_acc: 0.9823
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 129s - loss: 0.0180 - acc: 0.9962 - val_loss: 0.0780 - val_acc: 0.9823
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 129s - loss: 0.0181 - acc: 0.9964 - val_loss: 0.0788 - val_acc: 0.9825
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 129s - loss: 0.0175 - acc: 0.9965 - val_loss: 0.0792 - val_acc: 0.9822
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 129s - loss: 0.0174 - acc: 0.9966 - val_loss: 0.0785 - val_acc: 0.9828
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 129s - loss: 0.0173 - acc: 0.9966 - val_loss: 0.0807 - val_acc: 0.9820
14:36:21 Best Xval loss epoch 0, value 0.072485
14:36:21 Finished (frozen)...
LSTM units 16
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.3330
Sigmoid reg_penalty 0.00003000
14:37:39 Train Accuracy 0.997, Train F1 0.937, f_score 0.952 (beta 0.667)
[[127380    352]
 [    80   3239]]
14:38:06 Xval Accuracy 0.982, Xval F1 0.673, f_score 0.693 (beta 0.667)
[[42117   479]
 [  294   794]]
Raw score 2 0.011446
14:38:06 Saving...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_2 (Embedding)      (None, 120, 300)          3000300   
_________________________________________________________________
lstm_2 (LSTM)                (None, 32)                42624     
_________________________________________________________________
dropout_2 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 33        
=================================================================
Total params: 3,042,957.0
Trainable params: 3,042,957.0
Non-trainable params: 0.0
_________________________________________________________________
None
14:38:07 Starting (unfrozen)...
LSTM units 32
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.3330
Sigmoid reg_penalty 0.00003000
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 272s - loss: 0.2356 - acc: 0.9684 - val_loss: 0.1317 - val_acc: 0.9709
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 270s - loss: 0.1269 - acc: 0.9726 - val_loss: 0.1288 - val_acc: 0.9709
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 271s - loss: 0.1077 - acc: 0.9726 - val_loss: 0.0917 - val_acc: 0.9709
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 270s - loss: 0.0595 - acc: 0.9789 - val_loss: 0.0520 - val_acc: 0.9834
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 270s - loss: 0.0396 - acc: 0.9879 - val_loss: 0.0514 - val_acc: 0.9836
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 271s - loss: 0.0300 - acc: 0.9913 - val_loss: 0.0575 - val_acc: 0.9828
15:05:15 Best Xval loss epoch 4, value 0.051395
15:05:15 Using epoch 3, value 0.051395
15:05:15 Finished (unfrozen)...
LSTM units 32
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.3330
Sigmoid reg_penalty 0.00003000
15:07:02 Train Accuracy 0.995, Train F1 0.905, f_score 0.912 (beta 0.667)
[[127195    402]
 [   265   3189]]
15:07:37 Xval Accuracy 0.983, Xval F1 0.680, f_score 0.706 (beta 0.667)
[[42153   485]
 [  258   788]]
Raw score 2 0.012133
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_2 (Embedding)      (None, 120, 300)          3000300   
_________________________________________________________________
lstm_2 (LSTM)                (None, 32)                42624     
_________________________________________________________________
dropout_2 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 33        
=================================================================
Total params: 3,042,957.0
Trainable params: 42,657.0
Non-trainable params: 3,000,300.0
_________________________________________________________________
None
15:07:38 Continuing (frozen)...
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 226s - loss: 0.0202 - acc: 0.9949 - val_loss: 0.0669 - val_acc: 0.9807
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 226s - loss: 0.0185 - acc: 0.9952 - val_loss: 0.0722 - val_acc: 0.9816
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 225s - loss: 0.0177 - acc: 0.9954 - val_loss: 0.0775 - val_acc: 0.9814
15:18:58 Best Xval loss epoch 0, value 0.066860
15:18:58 Finished (frozen)...
LSTM units 32
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.3330
Sigmoid reg_penalty 0.00003000
15:20:46 Train Accuracy 0.996, Train F1 0.918, f_score 0.929 (beta 0.667)
[[127278    390]
 [   182   3201]]
15:21:22 Xval Accuracy 0.982, Xval F1 0.651, f_score 0.677 (beta 0.667)
[[42123   519]
 [  288   754]]
Raw score 2 0.010668
15:21:22 Saving...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_3 (Embedding)      (None, 120, 300)          3000300   
_________________________________________________________________
lstm_3 (LSTM)                (None, 64)                93440     
_________________________________________________________________
dropout_3 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 65        
=================================================================
Total params: 3,093,805.0
Trainable params: 3,093,805.0
Non-trainable params: 0.0
_________________________________________________________________
None
15:21:23 Starting (unfrozen)...
LSTM units 64
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.3330
Sigmoid reg_penalty 0.00003000
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 498s - loss: 0.1825 - acc: 0.9675 - val_loss: 0.1175 - val_acc: 0.9709
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 498s - loss: 0.0907 - acc: 0.9726 - val_loss: 0.0696 - val_acc: 0.9710
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 496s - loss: 0.0489 - acc: 0.9824 - val_loss: 0.0542 - val_acc: 0.9841
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 498s - loss: 0.0349 - acc: 0.9889 - val_loss: 0.0549 - val_acc: 0.9840
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 496s - loss: 0.0264 - acc: 0.9923 - val_loss: 0.0574 - val_acc: 0.9812
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 499s - loss: 0.0208 - acc: 0.9942 - val_loss: 0.0664 - val_acc: 0.9819
16:11:12 Best Xval loss epoch 2, value 0.054193
16:11:12 Using epoch 1, value 0.054193
16:11:12 Finished (unfrozen)...
LSTM units 64
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.3330
Sigmoid reg_penalty 0.00003000
16:14:07 Train Accuracy 0.996, Train F1 0.930, f_score 0.937 (beta 0.667)
[[127277    309]
 [   183   3282]]
16:15:05 Xval Accuracy 0.982, Xval F1 0.675, f_score 0.695 (beta 0.667)
[[42111   471]
 [  300   802]]
Raw score 2 0.011492
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_3 (Embedding)      (None, 120, 300)          3000300   
_________________________________________________________________
lstm_3 (LSTM)                (None, 64)                93440     
_________________________________________________________________
dropout_3 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 65        
=================================================================
Total params: 3,093,805.0
Trainable params: 93,505.0
Non-trainable params: 3,000,300.0
_________________________________________________________________
None
16:15:05 Continuing (frozen)...
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 424s - loss: 0.0141 - acc: 0.9963 - val_loss: 0.0840 - val_acc: 0.9826
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 426s - loss: 0.0129 - acc: 0.9966 - val_loss: 0.0866 - val_acc: 0.9821
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 425s - loss: 0.0118 - acc: 0.9969 - val_loss: 0.0961 - val_acc: 0.9817
16:36:23 Best Xval loss epoch 0, value 0.084031
16:36:23 Finished (frozen)...
LSTM units 64
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.3330
Sigmoid reg_penalty 0.00003000
16:39:18 Train Accuracy 0.997, Train F1 0.947, f_score 0.952 (beta 0.667)
[[127313    226]
 [   147   3365]]
16:40:17 Xval Accuracy 0.982, Xval F1 0.660, f_score 0.686 (beta 0.667)
[[42128   506]
 [  283   767]]
Raw score 2 0.011080
16:40:17 Saving...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_4 (Embedding)      (None, 120, 300)          3000300   
_________________________________________________________________
lstm_4 (LSTM)                (None, 128)               219648    
_________________________________________________________________
dropout_4 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 129       
=================================================================
Total params: 3,220,077.0
Trainable params: 3,220,077.0
Non-trainable params: 0.0
_________________________________________________________________
None
16:40:19 Starting (unfrozen)...
LSTM units 128
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.3330
Sigmoid reg_penalty 0.00003000
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 1035s - loss: 0.1561 - acc: 0.9705 - val_loss: 0.1287 - val_acc: 0.9709
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 1036s - loss: 0.0938 - acc: 0.9740 - val_loss: 0.0528 - val_acc: 0.9813
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 1034s - loss: 0.0413 - acc: 0.9864 - val_loss: 0.0487 - val_acc: 0.9843
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 1034s - loss: 0.0308 - acc: 0.9901 - val_loss: 0.0542 - val_acc: 0.9829
17:49:22 Best Xval loss epoch 2, value 0.048697
17:49:22 Using epoch 1, value 0.048697
17:49:22 Finished (unfrozen)...
LSTM units 128
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.3330
Sigmoid reg_penalty 0.00003000
17:54:55 Train Accuracy 0.994, Train F1 0.891, f_score 0.901 (beta 0.667)
[[127178    480]
 [   282   3111]]
17:56:46 Xval Accuracy 0.983, Xval F1 0.678, f_score 0.709 (beta 0.667)
[[42173   498]
 [  238   775]]
Raw score 2 0.012293
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_4 (Embedding)      (None, 120, 300)          3000300   
_________________________________________________________________
lstm_4 (LSTM)                (None, 128)               219648    
_________________________________________________________________
dropout_4 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 129       
=================================================================
Total params: 3,220,077.0
Trainable params: 219,777.0
Non-trainable params: 3,000,300.0
_________________________________________________________________
None
17:56:46 Continuing (frozen)...
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 909s - loss: 0.0211 - acc: 0.9943 - val_loss: 0.0673 - val_acc: 0.9821
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 907s - loss: 0.0200 - acc: 0.9944 - val_loss: 0.0689 - val_acc: 0.9818
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 909s - loss: 0.0189 - acc: 0.9948 - val_loss: 0.0752 - val_acc: 0.9820
18:42:14 Best Xval loss epoch 0, value 0.067327
18:42:14 Finished (frozen)...
LSTM units 128
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.3330
Sigmoid reg_penalty 0.00003000
18:47:47 Train Accuracy 0.995, Train F1 0.909, f_score 0.921 (beta 0.667)
[[127260    431]
 [   200   3160]]
18:49:38 Xval Accuracy 0.982, Xval F1 0.664, f_score 0.689 (beta 0.667)
[[42132   502]
 [  279   771]]
Raw score 2 0.011263
18:49:38 Saving...

In [20]:
# grid search - freeze embeddings - use pre-trained embeddings, train LSTM/sigmoid only, stop when xval loss reaches a minimum
embedding_vector_length = EMBEDDING_DIM

for sig_reg_penalty in [0.00003]:
    for dropout in [0.333]:
        for lstm_units in [16, 32, 64, 128]:
            for lstm_reg_penalty in [0.00000,]:
                #0.000001, 0.000003, 0.00001, 0.00003]:
                models = []
                xval_losses = []

                model = create_model(lstm_size=lstm_units, 
                                     lstm_reg_penalty=lstm_reg_penalty, 
                                     sigmoid_dropout=dropout, 
                                     sigmoid_reg_penalty=sig_reg_penalty,
                                     train_embed=False)
                print('%s Starting (frozen)...' % time.strftime("%H:%M:%S"))
                print ("LSTM units %d" % lstm_units)
                print ("LSTM reg_penalty %.8f" % lstm_reg_penalty)
                print ("Sigmoid dropout %.4f" %  dropout)
                print ("Sigmoid reg_penalty %.8f" % sig_reg_penalty)

                ##################################################################
                # train end-to-end including embeddings until xval loss bottoms out
                ##################################################################
                
                epochs = 10
                for _ in range(epochs):
                    fit = model.fit(X_train, y_train, validation_data=(X_xval, y_xval), epochs=1, batch_size=1024)
                    # save loss
                    train_loss = fit.history['loss'][-1]
                    train_acc = fit.history['acc'][-1]
                    xval_loss = fit.history['val_loss'][-1]
                    xval_acc = fit.history['val_acc'][-1]
                    xval_losses.append(xval_loss)
                    models.append(copy.copy(model))

                    bestloss_index = np.argmin(xval_losses)
                    bestloss_value = xval_losses[bestloss_index]

                    # break if loss rises by 10% from best
                    if xval_loss / bestloss_value > 1.1:
                        break
                    
                # keep model from epoch with best xval loss
                print ("%s Best Xval loss epoch %d, value %f" % (time.strftime("%H:%M:%S"), bestloss_index, bestloss_value))
                
                model = models[bestloss_index]

                print('%s Finished (frozen)...' % time.strftime("%H:%M:%S"))
                print ("LSTM units %d" % lstm_units)
                print ("LSTM reg_penalty %.8f" % lstm_reg_penalty)
                print ("Sigmoid dropout %.4f" %  dropout)
                print ("Sigmoid reg_penalty %.8f" % sig_reg_penalty)                
                
                y_train_prob = model.predict(X_train)
                
                beta=(2.0/3.0) # penalize false positives more than false negatives
                thresh, score = selectThreshold(y_train_prob, y_train, beta=beta)
                y_train_pred = y_train_prob >= thresh

                print("%s Train Accuracy %.3f, Train F1 %.3f, f_score %.3f (beta %.3f)" % 
                      (time.strftime("%H:%M:%S"),
                       sklearn.metrics.accuracy_score(y_train_pred, y_train), 
                       sklearn.metrics.f1_score(y_train_pred, y_train),
                       score, beta))
                
                print(sklearn.metrics.confusion_matrix(y_train_pred, y_train))

                y_xval_prob = model.predict(X_xval)
                
                thresh, score = selectThreshold(y_xval_prob, y_xval, beta=beta)
                y_xval_pred = y_xval_prob >= thresh
                
                print("%s Xval Accuracy %.3f, Xval F1 %.3f, f_score %.3f (beta %.3f)" % 
                      (time.strftime("%H:%M:%S"),
                       sklearn.metrics.accuracy_score(y_xval_pred, y_xval), 
                       sklearn.metrics.f1_score(y_xval_pred, y_xval),
                       score, beta))
                
                confusion_matrix = sklearn.metrics.confusion_matrix(y_xval_pred, y_xval)
                print(confusion_matrix)
                false_positive = confusion_matrix[1][0]
                false_negative = confusion_matrix[0][1]
                true_positive = confusion_matrix[1][1]
                raw_score = 1.0 * (true_positive - false_positive) / np.sum(confusion_matrix)
                print ("Raw score 2 %f" % raw_score)
                
                # save model to disk
                print('%s Saving...' % time.strftime("%H:%M:%S"))               
                modelname = "freeze_%d_%.6f_%.3f_%.6f" % (lstm_units, lstm_reg_penalty, dropout, sig_reg_penalty)
                model.save("%s.h5" % modelname)
                model.save_weights("%s_weights.h5" % modelname)
                with open("%s.json" % modelname, "wb") as fjson:
                    fjson.write(model.to_json())


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_10 (Embedding)     (None, 120, 300)          3000300   
_________________________________________________________________
lstm_10 (LSTM)               (None, 16)                20288     
_________________________________________________________________
dropout_10 (Dropout)         (None, 16)                0         
_________________________________________________________________
dense_10 (Dense)             (None, 1)                 17        
=================================================================
Total params: 3,020,605.0
Trainable params: 20,305.0
Non-trainable params: 3,000,300.0
_________________________________________________________________
None
22:23:52 Starting (unfrozen)...
LSTM units 16
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.3330
Sigmoid reg_penalty 0.00003000
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 135s - loss: 0.3728 - acc: 0.9682 - val_loss: 0.1550 - val_acc: 0.9709
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 133s - loss: 0.1396 - acc: 0.9726 - val_loss: 0.1325 - val_acc: 0.9709
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 133s - loss: 0.1319 - acc: 0.9726 - val_loss: 0.1282 - val_acc: 0.9709
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 133s - loss: 0.1045 - acc: 0.9734 - val_loss: 0.0776 - val_acc: 0.9751
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 133s - loss: 0.0734 - acc: 0.9779 - val_loss: 0.0650 - val_acc: 0.9793
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 133s - loss: 0.0651 - acc: 0.9803 - val_loss: 0.0595 - val_acc: 0.9810
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 133s - loss: 0.0593 - acc: 0.9817 - val_loss: 0.0557 - val_acc: 0.9814
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 133s - loss: 0.0564 - acc: 0.9823 - val_loss: 0.0538 - val_acc: 0.9824
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 133s - loss: 0.0538 - acc: 0.9828 - val_loss: 0.0531 - val_acc: 0.9827
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 133s - loss: 0.0522 - acc: 0.9837 - val_loss: 0.0508 - val_acc: 0.9829
22:46:12 Best Xval loss epoch 9, value 0.050836
22:46:12 Finished (unfrozen)...
LSTM units 16
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.3330
Sigmoid reg_penalty 0.00003000
22:47:34 Train Accuracy 0.984, Train F1 0.689, f_score 0.709 (beta 0.667)
[[126661   1282]
 [   799   2309]]
22:48:02 Xval Accuracy 0.983, Xval F1 0.666, f_score 0.710 (beta 0.667)
[[42219   541]
 [  192   732]]
Raw score 2 0.012362
22:48:02 Saving...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_11 (Embedding)     (None, 120, 300)          3000300   
_________________________________________________________________
lstm_11 (LSTM)               (None, 32)                42624     
_________________________________________________________________
dropout_11 (Dropout)         (None, 32)                0         
_________________________________________________________________
dense_11 (Dense)             (None, 1)                 33        
=================================================================
Total params: 3,042,957.0
Trainable params: 42,657.0
Non-trainable params: 3,000,300.0
_________________________________________________________________
None
22:48:05 Starting (unfrozen)...
LSTM units 32
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.3330
Sigmoid reg_penalty 0.00003000
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 232s - loss: 0.2605 - acc: 0.9652 - val_loss: 0.1311 - val_acc: 0.9710
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 230s - loss: 0.1245 - acc: 0.9726 - val_loss: 0.1213 - val_acc: 0.9710
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 229s - loss: 0.0974 - acc: 0.9738 - val_loss: 0.0711 - val_acc: 0.9762
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 230s - loss: 0.0662 - acc: 0.9784 - val_loss: 0.0620 - val_acc: 0.9786
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 230s - loss: 0.0599 - acc: 0.9800 - val_loss: 0.0571 - val_acc: 0.9803
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 229s - loss: 0.0555 - acc: 0.9814 - val_loss: 0.0536 - val_acc: 0.9816
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 230s - loss: 0.0526 - acc: 0.9823 - val_loss: 0.0523 - val_acc: 0.9829
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 230s - loss: 0.0510 - acc: 0.9830 - val_loss: 0.0501 - val_acc: 0.9831
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 230s - loss: 0.0491 - acc: 0.9836 - val_loss: 0.0498 - val_acc: 0.9833
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 230s - loss: 0.0480 - acc: 0.9840 - val_loss: 0.0499 - val_acc: 0.9824
23:26:31 Best Xval loss epoch 8, value 0.049789
23:26:31 Finished (unfrozen)...
LSTM units 32
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.3330
Sigmoid reg_penalty 0.00003000
23:28:23 Train Accuracy 0.984, Train F1 0.682, f_score 0.714 (beta 0.667)
[[126812   1397]
 [   648   2194]]
23:29:01 Xval Accuracy 0.984, Xval F1 0.691, f_score 0.717 (beta 0.667)
[[42159   468]
 [  252   805]]
Raw score 2 0.012659
23:29:01 Saving...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_12 (Embedding)     (None, 120, 300)          3000300   
_________________________________________________________________
lstm_12 (LSTM)               (None, 64)                93440     
_________________________________________________________________
dropout_12 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_12 (Dense)             (None, 1)                 65        
=================================================================
Total params: 3,093,805.0
Trainable params: 93,505.0
Non-trainable params: 3,000,300.0
_________________________________________________________________
None
23:29:04 Starting (unfrozen)...
LSTM units 64
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.3330
Sigmoid reg_penalty 0.00003000
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 435s - loss: 0.1882 - acc: 0.9698 - val_loss: 0.0825 - val_acc: 0.9752
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 434s - loss: 0.0677 - acc: 0.9780 - val_loss: 0.0595 - val_acc: 0.9793
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 434s - loss: 0.0563 - acc: 0.9808 - val_loss: 0.0540 - val_acc: 0.9816
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 435s - loss: 0.0515 - acc: 0.9828 - val_loss: 0.0519 - val_acc: 0.9822
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 537s - loss: 0.0498 - acc: 0.9832 - val_loss: 0.0505 - val_acc: 0.9832
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 544s - loss: 0.0487 - acc: 0.9838 - val_loss: 0.0497 - val_acc: 0.9828
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 534s - loss: 0.0478 - acc: 0.9841 - val_loss: 0.0487 - val_acc: 0.9834
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 542s - loss: 0.0467 - acc: 0.9845 - val_loss: 0.0485 - val_acc: 0.9835
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 543s - loss: 0.0465 - acc: 0.9843 - val_loss: 0.0484 - val_acc: 0.9834
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 545s - loss: 0.0454 - acc: 0.9848 - val_loss: 0.0485 - val_acc: 0.9832
00:52:14 Best Xval loss epoch 8, value 0.048431
00:52:14 Finished (unfrozen)...
LSTM units 64
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.3330
Sigmoid reg_penalty 0.00003000
00:56:09 Train Accuracy 0.986, Train F1 0.700, f_score 0.738 (beta 0.667)
[[126932   1371]
 [   528   2220]]
/home/ubuntu/anaconda2/envs/tensorflow/lib/python2.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in divide
  import sys
00:57:27 Xval Accuracy 0.984, Xval F1 0.682, f_score 0.720 (beta 0.667)
[[42207   509]
 [  204   764]]
Raw score 2 0.012819
00:57:27 Saving...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_13 (Embedding)     (None, 120, 300)          3000300   
_________________________________________________________________
lstm_13 (LSTM)               (None, 128)               219648    
_________________________________________________________________
dropout_13 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_13 (Dense)             (None, 1)                 129       
=================================================================
Total params: 3,220,077.0
Trainable params: 219,777.0
Non-trainable params: 3,000,300.0
_________________________________________________________________
None
00:57:31 Starting (unfrozen)...
LSTM units 128
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.3330
Sigmoid reg_penalty 0.00003000
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 1135s - loss: 0.1616 - acc: 0.9672 - val_loss: 0.1306 - val_acc: 0.9710
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 1133s - loss: 0.1255 - acc: 0.9727 - val_loss: 0.1301 - val_acc: 0.9710
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 1131s - loss: 0.1250 - acc: 0.9727 - val_loss: 0.1293 - val_acc: 0.9710
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 1131s - loss: 0.0914 - acc: 0.9736 - val_loss: 0.0597 - val_acc: 0.9797
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 1135s - loss: 0.0551 - acc: 0.9812 - val_loss: 0.0543 - val_acc: 0.9825
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 1135s - loss: 0.0510 - acc: 0.9830 - val_loss: 0.0510 - val_acc: 0.9833
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 1134s - loss: 0.0494 - acc: 0.9836 - val_loss: 0.0525 - val_acc: 0.9823
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 1133s - loss: 0.0493 - acc: 0.9833 - val_loss: 0.0496 - val_acc: 0.9834
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 1136s - loss: 0.0472 - acc: 0.9844 - val_loss: 0.0513 - val_acc: 0.9832
Train on 131051 samples, validate on 43684 samples
Epoch 1/1
131051/131051 [==============================] - 1134s - loss: 0.0470 - acc: 0.9844 - val_loss: 0.0485 - val_acc: 0.9838
04:06:35 Best Xval loss epoch 9, value 0.048461
04:06:35 Finished (unfrozen)...
LSTM units 128
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.3330
Sigmoid reg_penalty 0.00003000
04:14:16 Train Accuracy 0.985, Train F1 0.699, f_score 0.730 (beta 0.667)
[[126848   1332]
 [   612   2259]]
04:16:50 Xval Accuracy 0.984, Xval F1 0.679, f_score 0.723 (beta 0.667)
[[42232   526]
 [  179   747]]
Raw score 2 0.013002
04:16:50 Saving...

In [22]:
#load best model and evaluate in test set

modelname = "freeze_%d_%.6f_%.3f_%.6f" % (64, 0.000000, 0.333, 0.000030)

# doesn't work because of some custom metric BS
#keras.models.load_model("%s.h5" % modelname)

with open("%s.json" % modelname, 'r') as json_file:
    model_json = json_file.read()
model = model_from_json(model_json)
model.load_weights("%s_weights.h5" % modelname)
print("Loaded model from disk")


y_test_prob = model.predict(X_test)
beta=(2.0/3.0) # penalize false positives more than false negatives

y_test_pred = y_test_prob >= thresh
print("Test Accuracy %.3f, Test F1 %.3f, f_score %.3f (beta %.3f)" % 
                      (sklearn.metrics.accuracy_score(y_test_pred, y_test), 
                       sklearn.metrics.f1_score(y_test_pred, y_test),
                       score, beta))
                
print(sklearn.metrics.confusion_matrix(y_test_pred, y_test))


Loaded model from disk
Test Accuracy 0.983, Test F1 0.639, f_score 0.723 (beta 0.667)
[[42291   593]
 [  146   654]]

In [ ]: