In [1]:
from __future__ import print_function
import collections
import numpy as np
import pandas as pd
import pickle
import sklearn
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import LSTM
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.regularizers import L1L2
from sklearn.model_selection import train_test_split

# fix random seed for reproducibility
np.random.seed(7)

from pprint import pprint


Using TensorFlow backend.

In [2]:
# Global config variables 
model_name = "streeteye_lstm"
#data_file = "lstm_dump_test.txt"
data_file = "dump_2017_words.txt"

checkpoint_dir = "/home/ubuntu/mount/Notebooks/checkpoints"
tensorboard_dir ="/home/ubuntu/mount/Notebooks/tensorboard"

In [3]:
############################################################
# 1. load data
############################################################

# load dataset
print("Loading data...")
data=[]
y=[]

# count words
c = collections.Counter()

with open(data_file, "r") as infile:
    for line in infile:
        l = line.rstrip('\n').split(",")
        label = l.pop(0)
        # skip empty headlines
        if len(l[0]) == 0:
            continue
        if '' in l:
            l = [w for w in l if w]
        data.append(l)
        y.append(label)
        c.update(l)
        
print("Loaded data.")


Loading data...
Loaded data.

In [4]:
# create a list of top words        
vocabulary_size = 10000 # set this to have ~20 for least popular
count = [['UNK', -1]]
count.extend(c.most_common(vocabulary_size - 1))
print(count[:10])
print(count[-10:])


[['UNK', -1], ('domain_otherdomain', 119708), ('subsource_othersubsource', 47862), ('trump', 21141), ('with', 10761), ('domain_youtube.com', 8908), ('us', 8434), ('2017', 7862), ('from', 7768), ('subsource_memeorandum', 7712)]
[('hazard', 17), ('alexei', 17), ('molly', 17), ('expel', 17), ('champ', 17), ('admiral', 17), ('conversational', 17), ('memorable', 17), ('wharton', 17), ('torn', 17)]

In [5]:
dictionary = dict()
# map words into a dict of ints
for word, _ in count:
    dictionary[word] = len(dictionary)

data_embeddings=[]
unk_count = 0

for obs in data:
    embedlist = []
    for word in obs:
        if word in dictionary:
            index = dictionary[word]
        else:
            index = 0  # dictionary['UNK']
            unk_count = unk_count + 1
        embedlist.append(index)
    data_embeddings.append(embedlist)
        
count[0][1] = unk_count
reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))

In [6]:
print(dictionary['trump'])
print(reverse_dictionary[3])


3
trump

In [8]:
%matplotlib inline
ls = (map(len, data_embeddings))
pd.DataFrame(ls).hist()


Out[8]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7fa2c5724b50>]], dtype=object)

In [7]:
MAX_LENGTH = 120
X = sequence.pad_sequences(data_embeddings, maxlen=MAX_LENGTH)
X[0]
X.shape


Out[7]:
(218419, 120)

In [8]:
y=np.array(np.float32(y))

y=y.reshape((y.shape[0],1))
print(y.shape)
num_labels=1

num_obs, num_features = X.shape
print("Observations: %d\nFeatures: %d" % (num_obs, num_features))

# split into training, xval, test, 60/20/20
print("Split into training, temp")
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4)
print("Split into xval, test")
X_xval, X_test, y_xval, y_test = train_test_split(X_temp, y_temp, test_size=0.5)


(218419, 1)
Observations: 218419
Features: 120
Split into training, temp
Split into xval, test

In [9]:
print("Training set")
print(X_train.shape)

print("Xval set")
print(X_xval.shape)

print("Test set")
print(X_test.shape)

num_training_samples = X_train.shape[0]
num_xval_samples = X_xval.shape[0]
num_test_samples = X_test.shape[0]

print ("\nTraining observations:  %d  \nXval observations:  %d  \nTest observations:  %d\n" % (num_training_samples, num_xval_samples, num_test_samples))


Training set
(131051, 120)
Xval set
(43684, 120)
Test set
(43684, 120)

Training observations:  131051  
Xval observations:  43684  
Test observations:  43684


In [10]:
# initialize embeddings to Google vals
pkl_file = open('embeddings.pkl', 'rb')
embeddings_dict, embeddings_reverse_dict, embeddings_data = pickle.load(pkl_file)

In [11]:
EMBEDDING_DIM=300

embedding_matrix = np.zeros((len(dictionary) + 1, EMBEDDING_DIM))

count = 0
for word, i in dictionary.items():
    #print(word)
    embed_i = embeddings_dict.get(word)
    if embed_i is not None:
        embedding_vector = embeddings_data[i]
        count +=1
        embedding_matrix[i] = embedding_vector
        
print("initialized %d embeddings" % count)


initialized 10000 embeddings

In [17]:
# function to generate model

def create_model(lstm_size=30, lstm_reg_penalty=0.0, sigmoid_dropout=(1.0/3.0), sigmoid_reg_penalty=0.0001):
    # create model
    model = Sequential()

    model.add(Embedding(len(dictionary) + 1, 
                        embedding_vector_length, 
                        weights=[embedding_matrix],
                        input_length=MAX_LENGTH,
                        trainable=False))
    
    # LSTM with lstm_size units
    model.add(LSTM(lstm_size,
                   kernel_regularizer=L1L2(l1=lstm_reg_penalty, l2=lstm_reg_penalty)))
    model.add(Dropout(sigmoid_dropout))
    model.add(Dense(1, 
                    activation='sigmoid',
                    kernel_initializer='TruncatedNormal', 
                    kernel_regularizer=L1L2(l1=sigmoid_reg_penalty, l2=sigmoid_reg_penalty)))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    print(model.summary())
    return model

In [18]:
def selectThreshold (logits, labels, beta=(2.0/3)):
    # return threshold, f-score that yields best F-score
    # predict using true if >= threshold

    precision, recall, thresholds = sklearn.metrics.precision_recall_curve(labels, logits)
    bb = beta**2
    f1_scores = (1 + bb) * precision * recall / (bb * precision + recall)
    best_index = np.argmax(f1_scores)
    best_threshold = thresholds[best_index]
    best_score = f1_scores[best_index]
    return (best_threshold, best_score)

In [22]:
# create the model
embedding_vector_length = EMBEDDING_DIM

for sig_reg_penalty in [0.00003]:
    for dropout in [0.5]:
        for lstm_units in [16,]:
            #, 32, 64]:
            for lstm_reg_penalty in [0.00000,]:
                #0.000001, 0.000003, 0.00001, 0.00003]:
                model = create_model(lstm_size=lstm_units, 
                                     lstm_reg_penalty=lstm_reg_penalty, 
                                     sigmoid_dropout=dropout, 
                                     sigmoid_reg_penalty=sig_reg_penalty)
                print ("LSTM units %d" % lstm_units)
                print ("LSTM reg_penalty %.8f" % lstm_reg_penalty)
                print ("Sigmoid dropout %.4f" %  dropout)
                print ("Sigmoid reg_penalty %.8f" % sig_reg_penalty)
                
                model.fit(X_train, y_train, validation_data=(X_xval, y_xval), epochs=100, batch_size=128)
                
                y_train_prob = model.predict(X_train)
                
                beta=(2.0/3.0) # penalize false positives more than false negatives
                thresh, score = selectThreshold(y_train_prob, y_train, beta=beta)
                y_train_pred = y_train_prob >= thresh
                
                print("Train Accuracy %.3f, Train F1 %.3f, f_score %.3f (beta %.3f)" % 
                      (sklearn.metrics.accuracy_score(y_train_pred, y_train), 
                       sklearn.metrics.f1_score(y_train_pred, y_train),
                       score, beta))
                
                print(sklearn.metrics.confusion_matrix(y_train_pred, y_train))

                y_xval_prob = model.predict(X_xval)
                
                thresh, score = selectThreshold(y_xval_prob, y_xval, beta=beta)
                y_xval_pred = y_xval_prob >= thresh
                
                print ("LSTM units %d" % lstm_units)
                print ("LSTM reg_penalty %.8f" % lstm_reg_penalty)
                print ("Sigmoid dropout %.4f" %  dropout)
                print ("Sigmoid reg_penalty %.8f" % sig_reg_penalty)                

                print("Xval Accuracy %.3f, Xval F1 %.3f, f_score %.3f (beta %.3f)" % 
                      (sklearn.metrics.accuracy_score(y_xval_pred, y_xval), 
                       sklearn.metrics.f1_score(y_xval_pred, y_xval),
                       score, beta))
                
                confusion_matrix = sklearn.metrics.confusion_matrix(y_xval_pred, y_xval)
                print(confusion_matrix)
                false_positive = confusion_matrix[1][0]
                false_negative = confusion_matrix[0][1]
                raw_score = (2.0*false_positive + false_negative) / np.sum(confusion_matrix)
                    
                print ("Raw score %f" % raw_score)


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_22 (Embedding)     (None, 120, 300)          3000300   
_________________________________________________________________
lstm_22 (LSTM)               (None, 16)                20288     
_________________________________________________________________
dropout_22 (Dropout)         (None, 16)                0         
_________________________________________________________________
dense_22 (Dense)             (None, 1)                 17        
=================================================================
Total params: 3,020,605.0
Trainable params: 20,305.0
Non-trainable params: 3,000,300.0
_________________________________________________________________
None
LSTM units 16
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.5000
Sigmoid reg_penalty 0.00003000
Train on 131051 samples, validate on 43684 samples
Epoch 1/100
131051/131051 [==============================] - 229s - loss: 0.1207 - acc: 0.9755 - val_loss: 0.0596 - val_acc: 0.9803
Epoch 2/100
131051/131051 [==============================] - 319s - loss: 0.0610 - acc: 0.9813 - val_loss: 0.0534 - val_acc: 0.9825
Epoch 3/100
131051/131051 [==============================] - 318s - loss: 0.0549 - acc: 0.9827 - val_loss: 0.0509 - val_acc: 0.9831
Epoch 4/100
131051/131051 [==============================] - 318s - loss: 0.0522 - acc: 0.9838 - val_loss: 0.0500 - val_acc: 0.9832
Epoch 5/100
131051/131051 [==============================] - 293s - loss: 0.0504 - acc: 0.9841 - val_loss: 0.0505 - val_acc: 0.9835
Epoch 6/100
131051/131051 [==============================] - 264s - loss: 0.0491 - acc: 0.9845 - val_loss: 0.0483 - val_acc: 0.9837
Epoch 7/100
131051/131051 [==============================] - 318s - loss: 0.0479 - acc: 0.9849 - val_loss: 0.0484 - val_acc: 0.9835
Epoch 8/100
131051/131051 [==============================] - 318s - loss: 0.0472 - acc: 0.9852 - val_loss: 0.0482 - val_acc: 0.9836
Epoch 9/100
131051/131051 [==============================] - 316s - loss: 0.0461 - acc: 0.9852 - val_loss: 0.0482 - val_acc: 0.9837
Epoch 10/100
131051/131051 [==============================] - 295s - loss: 0.0450 - acc: 0.9855 - val_loss: 0.0485 - val_acc: 0.9835
Epoch 11/100
131051/131051 [==============================] - 196s - loss: 0.0436 - acc: 0.9857 - val_loss: 0.0491 - val_acc: 0.9833
Epoch 12/100
131051/131051 [==============================] - 182s - loss: 0.0434 - acc: 0.9861 - val_loss: 0.0487 - val_acc: 0.9840
Epoch 13/100
131051/131051 [==============================] - 181s - loss: 0.0424 - acc: 0.9861 - val_loss: 0.0484 - val_acc: 0.9837
Epoch 14/100
131051/131051 [==============================] - 182s - loss: 0.0417 - acc: 0.9862 - val_loss: 0.0498 - val_acc: 0.9838
Epoch 15/100
131051/131051 [==============================] - 181s - loss: 0.0409 - acc: 0.9863 - val_loss: 0.0488 - val_acc: 0.9838
Epoch 16/100
131051/131051 [==============================] - 182s - loss: 0.0399 - acc: 0.9868 - val_loss: 0.0505 - val_acc: 0.9840
Epoch 17/100
131051/131051 [==============================] - 182s - loss: 0.0388 - acc: 0.9872 - val_loss: 0.0494 - val_acc: 0.9833
Epoch 18/100
131051/131051 [==============================] - 182s - loss: 0.0385 - acc: 0.9874 - val_loss: 0.0500 - val_acc: 0.9836
Epoch 19/100
131051/131051 [==============================] - 182s - loss: 0.0378 - acc: 0.9874 - val_loss: 0.0515 - val_acc: 0.9836
Epoch 20/100
131051/131051 [==============================] - 181s - loss: 0.0370 - acc: 0.9880 - val_loss: 0.0534 - val_acc: 0.9828
Epoch 21/100
131051/131051 [==============================] - 181s - loss: 0.0362 - acc: 0.9880 - val_loss: 0.0573 - val_acc: 0.9839
Epoch 22/100
131051/131051 [==============================] - 182s - loss: 0.0363 - acc: 0.9879 - val_loss: 0.0534 - val_acc: 0.9826
Epoch 23/100
131051/131051 [==============================] - 181s - loss: 0.0350 - acc: 0.9883 - val_loss: 0.0532 - val_acc: 0.9832
Epoch 24/100
131051/131051 [==============================] - 182s - loss: 0.0344 - acc: 0.9883 - val_loss: 0.0644 - val_acc: 0.9814
Epoch 25/100
131051/131051 [==============================] - 182s - loss: 0.0336 - acc: 0.9889 - val_loss: 0.0567 - val_acc: 0.9835
Epoch 26/100
131051/131051 [==============================] - 181s - loss: 0.0329 - acc: 0.9889 - val_loss: 0.0559 - val_acc: 0.9835
Epoch 27/100
131051/131051 [==============================] - 181s - loss: 0.0321 - acc: 0.9891 - val_loss: 0.0550 - val_acc: 0.9831
Epoch 28/100
131051/131051 [==============================] - 182s - loss: 0.0313 - acc: 0.9894 - val_loss: 0.0593 - val_acc: 0.9832
Epoch 29/100
131051/131051 [==============================] - 182s - loss: 0.0306 - acc: 0.9897 - val_loss: 0.0592 - val_acc: 0.9822
Epoch 30/100
131051/131051 [==============================] - 182s - loss: 0.0303 - acc: 0.9898 - val_loss: 0.0619 - val_acc: 0.9823
Epoch 31/100
131051/131051 [==============================] - 182s - loss: 0.0295 - acc: 0.9903 - val_loss: 0.0631 - val_acc: 0.9832
Epoch 32/100
131051/131051 [==============================] - 182s - loss: 0.0297 - acc: 0.9899 - val_loss: 0.0636 - val_acc: 0.9826
Epoch 33/100
131051/131051 [==============================] - 181s - loss: 0.0277 - acc: 0.9908 - val_loss: 0.0654 - val_acc: 0.9822
Epoch 34/100
131051/131051 [==============================] - 182s - loss: 0.0274 - acc: 0.9910 - val_loss: 0.0674 - val_acc: 0.9831
Epoch 35/100
131051/131051 [==============================] - 181s - loss: 0.0262 - acc: 0.9915 - val_loss: 0.0661 - val_acc: 0.9830
Epoch 36/100
131051/131051 [==============================] - 181s - loss: 0.0261 - acc: 0.9914 - val_loss: 0.0670 - val_acc: 0.9811
Epoch 37/100
131051/131051 [==============================] - 181s - loss: 0.0255 - acc: 0.9916 - val_loss: 0.0730 - val_acc: 0.9828
Epoch 38/100
131051/131051 [==============================] - 181s - loss: 0.0255 - acc: 0.9917 - val_loss: 0.0717 - val_acc: 0.9820
Epoch 39/100
131051/131051 [==============================] - 182s - loss: 0.0244 - acc: 0.9923 - val_loss: 0.0777 - val_acc: 0.9830
Epoch 40/100
131051/131051 [==============================] - 183s - loss: 0.0241 - acc: 0.9918 - val_loss: 0.0740 - val_acc: 0.9815
Epoch 41/100
131051/131051 [==============================] - 181s - loss: 0.0234 - acc: 0.9923 - val_loss: 0.0710 - val_acc: 0.9821
Epoch 42/100
131051/131051 [==============================] - 181s - loss: 0.0225 - acc: 0.9928 - val_loss: 0.0853 - val_acc: 0.9824
Epoch 43/100
131051/131051 [==============================] - 181s - loss: 0.0248 - acc: 0.9917 - val_loss: 0.0753 - val_acc: 0.9825
Epoch 44/100
131051/131051 [==============================] - 181s - loss: 0.0217 - acc: 0.9932 - val_loss: 0.0762 - val_acc: 0.9814
Epoch 45/100
131051/131051 [==============================] - 181s - loss: 0.0210 - acc: 0.9931 - val_loss: 0.0858 - val_acc: 0.9807
Epoch 46/100
131051/131051 [==============================] - 181s - loss: 0.0211 - acc: 0.9935 - val_loss: 0.0796 - val_acc: 0.9820
Epoch 47/100
131051/131051 [==============================] - 182s - loss: 0.0202 - acc: 0.9938 - val_loss: 0.0866 - val_acc: 0.9813
Epoch 48/100
131051/131051 [==============================] - 181s - loss: 0.0198 - acc: 0.9936 - val_loss: 0.0862 - val_acc: 0.9810
Epoch 49/100
131051/131051 [==============================] - 181s - loss: 0.0193 - acc: 0.9939 - val_loss: 0.0921 - val_acc: 0.9814
Epoch 50/100
131051/131051 [==============================] - 182s - loss: 0.0196 - acc: 0.9937 - val_loss: 0.0879 - val_acc: 0.9807
Epoch 51/100
131051/131051 [==============================] - 182s - loss: 0.0186 - acc: 0.9940 - val_loss: 0.0920 - val_acc: 0.9811
Epoch 52/100
131051/131051 [==============================] - 181s - loss: 0.0182 - acc: 0.9944 - val_loss: 0.0922 - val_acc: 0.9820
Epoch 53/100
131051/131051 [==============================] - 182s - loss: 0.0174 - acc: 0.9948 - val_loss: 0.0985 - val_acc: 0.9815
Epoch 54/100
131051/131051 [==============================] - 182s - loss: 0.0173 - acc: 0.9949 - val_loss: 0.0936 - val_acc: 0.9802
Epoch 55/100
131051/131051 [==============================] - 183s - loss: 0.0193 - acc: 0.9942 - val_loss: 0.0833 - val_acc: 0.9796
Epoch 56/100
131051/131051 [==============================] - 182s - loss: 0.0172 - acc: 0.9949 - val_loss: 0.0994 - val_acc: 0.9816
Epoch 57/100
131051/131051 [==============================] - 183s - loss: 0.0161 - acc: 0.9953 - val_loss: 0.0932 - val_acc: 0.9784
Epoch 58/100
131051/131051 [==============================] - 182s - loss: 0.0163 - acc: 0.9952 - val_loss: 0.0956 - val_acc: 0.9811
Epoch 59/100
131051/131051 [==============================] - 183s - loss: 0.0161 - acc: 0.9952 - val_loss: 0.1003 - val_acc: 0.9802
Epoch 60/100
131051/131051 [==============================] - 182s - loss: 0.0152 - acc: 0.9955 - val_loss: 0.1010 - val_acc: 0.9807
Epoch 61/100
131051/131051 [==============================] - 182s - loss: 0.0149 - acc: 0.9958 - val_loss: 0.1039 - val_acc: 0.9808
Epoch 62/100
131051/131051 [==============================] - 181s - loss: 0.0153 - acc: 0.9955 - val_loss: 0.0980 - val_acc: 0.9816
Epoch 63/100
131051/131051 [==============================] - 182s - loss: 0.0142 - acc: 0.9961 - val_loss: 0.1078 - val_acc: 0.9800
Epoch 64/100
131051/131051 [==============================] - 182s - loss: 0.0143 - acc: 0.9960 - val_loss: 0.1094 - val_acc: 0.9799
Epoch 65/100
131051/131051 [==============================] - 182s - loss: 0.0141 - acc: 0.9961 - val_loss: 0.1125 - val_acc: 0.9806
Epoch 66/100
131051/131051 [==============================] - 182s - loss: 0.0131 - acc: 0.9962 - val_loss: 0.1099 - val_acc: 0.9799
Epoch 67/100
131051/131051 [==============================] - 182s - loss: 0.0135 - acc: 0.9962 - val_loss: 0.1135 - val_acc: 0.9810
Epoch 68/100
131051/131051 [==============================] - 182s - loss: 0.0136 - acc: 0.9961 - val_loss: 0.1146 - val_acc: 0.9790
Epoch 69/100
131051/131051 [==============================] - 182s - loss: 0.0131 - acc: 0.9964 - val_loss: 0.1121 - val_acc: 0.9800
Epoch 70/100
131051/131051 [==============================] - 182s - loss: 0.0141 - acc: 0.9965 - val_loss: 0.1042 - val_acc: 0.9803
Epoch 71/100
131051/131051 [==============================] - 183s - loss: 0.0130 - acc: 0.9966 - val_loss: 0.1117 - val_acc: 0.9802
Epoch 72/100
131051/131051 [==============================] - 182s - loss: 0.0122 - acc: 0.9967 - val_loss: 0.1123 - val_acc: 0.9804
Epoch 73/100
131051/131051 [==============================] - 182s - loss: 0.0135 - acc: 0.9964 - val_loss: 0.1137 - val_acc: 0.9809
Epoch 74/100
131051/131051 [==============================] - 183s - loss: 0.0125 - acc: 0.9968 - val_loss: 0.1122 - val_acc: 0.9796
Epoch 75/100
131051/131051 [==============================] - 182s - loss: 0.0118 - acc: 0.9970 - val_loss: 0.1141 - val_acc: 0.9810
Epoch 76/100
131051/131051 [==============================] - 181s - loss: 0.0106 - acc: 0.9974 - val_loss: 0.1237 - val_acc: 0.9816
Epoch 77/100
131051/131051 [==============================] - 181s - loss: 0.0115 - acc: 0.9971 - val_loss: 0.1186 - val_acc: 0.9812
Epoch 78/100
131051/131051 [==============================] - 182s - loss: 0.0127 - acc: 0.9967 - val_loss: 0.1116 - val_acc: 0.9807
Epoch 79/100
131051/131051 [==============================] - 182s - loss: 0.0115 - acc: 0.9972 - val_loss: 0.1185 - val_acc: 0.9804
Epoch 80/100
131051/131051 [==============================] - 182s - loss: 0.0134 - acc: 0.9965 - val_loss: 0.1140 - val_acc: 0.9808
Epoch 81/100
131051/131051 [==============================] - 182s - loss: 0.0105 - acc: 0.9974 - val_loss: 0.1226 - val_acc: 0.9793
Epoch 82/100
131051/131051 [==============================] - 183s - loss: 0.0103 - acc: 0.9974 - val_loss: 0.1253 - val_acc: 0.9794
Epoch 83/100
131051/131051 [==============================] - 182s - loss: 0.0104 - acc: 0.9975 - val_loss: 0.1268 - val_acc: 0.9800
Epoch 84/100
131051/131051 [==============================] - 183s - loss: 0.0112 - acc: 0.9971 - val_loss: 0.1241 - val_acc: 0.9801
Epoch 85/100
131051/131051 [==============================] - 183s - loss: 0.0113 - acc: 0.9973 - val_loss: 0.1202 - val_acc: 0.9798
Epoch 86/100
131051/131051 [==============================] - 183s - loss: 0.0108 - acc: 0.9974 - val_loss: 0.1188 - val_acc: 0.9805
Epoch 87/100
131051/131051 [==============================] - 183s - loss: 0.0096 - acc: 0.9976 - val_loss: 0.1279 - val_acc: 0.9805
Epoch 88/100
131051/131051 [==============================] - 183s - loss: 0.0098 - acc: 0.9976 - val_loss: 0.1241 - val_acc: 0.9794
Epoch 89/100
131051/131051 [==============================] - 184s - loss: 0.0094 - acc: 0.9978 - val_loss: 0.1316 - val_acc: 0.9799
Epoch 90/100
131051/131051 [==============================] - 183s - loss: 0.0115 - acc: 0.9972 - val_loss: 0.1219 - val_acc: 0.9801
Epoch 91/100
131051/131051 [==============================] - 183s - loss: 0.0089 - acc: 0.9979 - val_loss: 0.1347 - val_acc: 0.9804
Epoch 92/100
131051/131051 [==============================] - 184s - loss: 0.0119 - acc: 0.9972 - val_loss: 0.1200 - val_acc: 0.9804
Epoch 93/100
131051/131051 [==============================] - 184s - loss: 0.0089 - acc: 0.9980 - val_loss: 0.1342 - val_acc: 0.9794
Epoch 94/100
131051/131051 [==============================] - 183s - loss: 0.0096 - acc: 0.9978 - val_loss: 0.1278 - val_acc: 0.9800
Epoch 95/100
131051/131051 [==============================] - 183s - loss: 0.0087 - acc: 0.9982 - val_loss: 0.1309 - val_acc: 0.9804
Epoch 96/100
131051/131051 [==============================] - 183s - loss: 0.0098 - acc: 0.9979 - val_loss: 0.1332 - val_acc: 0.9788
Epoch 97/100
131051/131051 [==============================] - 183s - loss: 0.0095 - acc: 0.9977 - val_loss: 0.1262 - val_acc: 0.9801
Epoch 98/100
131051/131051 [==============================] - 183s - loss: 0.0095 - acc: 0.9977 - val_loss: 0.1300 - val_acc: 0.9799
Epoch 99/100
131051/131051 [==============================] - 184s - loss: 0.0089 - acc: 0.9980 - val_loss: 0.1355 - val_acc: 0.9794
Epoch 100/100
131051/131051 [==============================] - 183s - loss: 0.0095 - acc: 0.9979 - val_loss: 0.1326 - val_acc: 0.9793
Train Accuracy 0.999, Train F1 0.975, f_score 0.981 (beta 0.667)
[[127430    144]
 [    30   3447]]
LSTM units 16
LSTM reg_penalty 0.00000000
Sigmoid dropout 0.5000
Sigmoid reg_penalty 0.00003000
Xval Accuracy 0.981, Xval F1 0.627, f_score 0.670 (beta 0.667)
[[42188   590]
 [  223   683]]
Raw score 0.023716

In [15]:
y_train_prob = model.predict(X_train)
beta=(2.0/3.0) # penalize false positives more than false negatives

thresh, score = selectThreshold(y_train_prob, y_train, beta=beta)
y_train_pred = y_train_prob >= thresh
print("Train Accuracy %.3f, Train F1 %.3f, f_score %.3f (beta %.3f)" % 
                      (sklearn.metrics.accuracy_score(y_train_pred, y_train), 
                       sklearn.metrics.f1_score(y_train_pred, y_train),
                       score, beta))
                
print(sklearn.metrics.confusion_matrix(y_train_pred, y_train))


Train Accuracy 0.996, Train F1 0.931, f_score 0.944 (beta 0.667)
[[127340    358]
 [   120   3233]]

In [16]:
y_xval_prob = model.predict(X_xval)
                
                thresh, score = selectThreshold(y_xval_prob, y_xval, beta=beta)
                y_xval_pred = y_xval_prob >= thresh

                print("Xval Accuracy %.3f, Xval F1 %.3f, f_score %.3f (beta %.3f)" % 
                      (sklearn.metrics.accuracy_score(y_xval_pred, y_xval), 
                       sklearn.metrics.f1_score(y_xval_pred, y_xval),
                       score, beta))
                
                print(sklearn.metrics.confusion_matrix(y_xval_pred, y_xval))


Xval Accuracy 0.971, Xval F1 0.000, f_score nan (beta 0.667)
[[42410  1273]
 [    1     0]]
/home/ubuntu/anaconda2/envs/tensorflow/lib/python2.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in divide
  import sys

In [17]:
y_test_prob = model.predict(X_test)
beta=(2.0/3.0) # penalize false positives more than false negatives

y_test_pred = y_test_prob >= thresh
print("Test Accuracy %.3f, Test F1 %.3f, f_score %.3f (beta %.3f)" % 
                      (sklearn.metrics.accuracy_score(y_test_pred, y_test), 
                       sklearn.metrics.f1_score(y_test_pred, y_test),
                       score, beta))
                
print(sklearn.metrics.confusion_matrix(y_test_pred, y_test))


Test Accuracy 0.971, Test F1 0.002, f_score nan (beta 0.667)
[[42437  1246]
 [    0     1]]

In [ ]: