Use GoogleNews-vectors-negative300.bin
LSTM(64)
DENSE(64)
weights.003-0.2828.hdf5
193s - loss: 0.2478 - acc: 0.8197 - val_loss: 0.2828 - val_acc: 0.7996
LSTM(64)
DENSE(64)
BATCH_SIZE 256 ==> 2048
weights.011-0.2908.hdf5
63s - loss: 0.2398 - acc: 0.8276 - val_loss: 0.2908 - val_acc: 0.8005
LSTM(64)
DENSE(64)
BATCH_SIZE 256 ==> 64
weights.002-0.2822.hdf5
759s - loss: 0.2390 - acc: 0.8283 - val_loss: 0.2822 - val_acc: 0.8134
LSTM(64)
DENSE(64)
BATCH_SIZE 256 ==> 32
weights.001-0.2812.hdf5
1541s - loss: 0.2622 - acc: 0.8065 - val_loss: 0.2812 - val_acc: 0.8008
import requests
def download_file_from_google_drive(id, destination):
URL = "https://docs.google.com/uc?export=download"
session = requests.Session()
response = session.get(URL, params = { 'id' : id }, stream = True)
token = get_confirm_token(response)
if token:
params = { 'id' : id, 'confirm' : token }
response = session.get(URL, params = params, stream = True)
save_response_content(response, destination)
def get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value
return None
def save_response_content(response, destination):
CHUNK_SIZE = 32768
with open(destination, "wb") as f:
for chunk in response.iter_content(CHUNK_SIZE):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
file_id = '0B7XkCwpI5KDYNlNUTTlSS21pQmM'
destination = '../data/GoogleNews-vectors-negative300.bin.gz'
download_file_from_google_drive(file_id, destination)
# Check for any null values
# inds = pd.isnull(trainval_df).any(1).nonzero()[0]
# trainval_df.loc[inds]
# inds = pd.isnull(test_df).any(1).nonzero()[0]
# test_df.loc[inds]
# # Add the string 'empty' to empty strings
# trainval_df = trainval_df.fillna('empty')
# test_df = test_df.fillna('empty')
trainval_df['len1'] = trainval_df.apply(lambda row: len(row['question1_WL'].split()), axis=1)
trainval_df['len2'] = trainval_df.apply(lambda row: len(row['question2_WL'].split()), axis=1)
test_df['len1'] = test_df.apply(lambda row: len(row['question1_WL'].split()), axis=1)
test_df['len2'] = test_df.apply(lambda row: len(row['question2_WL'].split()), axis=1)
lengths = pd.concat([trainval_df['len1'],trainval_df['len2']], axis=0)
print(lengths.describe())
print(np.percentile(lengths, 99.0))
print(np.percentile(lengths, 99.4))
print(np.percentile(lengths, 99.5))
print(np.percentile(lengths, 99.9))
Loading from file trainval_df.pickle
Loading from file test_df.pickle
Loading from file GoogleNews-vectors-negative300.bin.word_embedding_matrix.pickle
word_counts = tokenizer.word_counts
null_embedding_word_counts = { word: word_counts[word] for word in null_embedding_words }
print(sum(null_embedding_word_counts.values())) #454210
word_docs = tokenizer.word_docs
null_embedding_word_docs = { word: word_docs[word] for word in null_embedding_words }
print(sum(null_embedding_word_docs.values())) #446584
# 446584/(404290+2345796)/2 = 0.08119
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding_4 (Embedding) (None, 40, 300) 37906800
_________________________________________________________________
lstm_4 (LSTM) (None, 64) 93440
=================================================================
Total params: 38,000,240.0
Trainable params: 93,440.0
Non-trainable params: 37,906,800.0
_________________________________________________________________
____________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
====================================================================================================
q1 (InputLayer) (None, 40) 0
____________________________________________________________________________________________________
q2 (InputLayer) (None, 40) 0
____________________________________________________________________________________________________
sequential_8 (Sequential) (None, 64) 38000240
____________________________________________________________________________________________________
concatenate_4 (Concatenate) (None, 128) 0
____________________________________________________________________________________________________
dropout_7 (Dropout) (None, 128) 0
____________________________________________________________________________________________________
dense_7 (Dense) (None, 64) 8256
____________________________________________________________________________________________________
dropout_8 (Dropout) (None, 64) 0
____________________________________________________________________________________________________
dense_8 (Dense) (None, 1) 65
====================================================================================================
Total params: 38,008,561.0
Trainable params: 101,761.0
Non-trainable params: 37,906,800.0
____________________________________________________________________________________________________
BATCH_SIZE: 32
Train on 770048 samples, validate on 38532 samples
Epoch 1/100
Epoch 00000: val_loss improved from inf to 0.29426, saving model to ./checkpoint/weights.000-0.2943.hdf5
1542s - loss: 0.3256 - acc: 0.7399 - val_loss: 0.2943 - val_acc: 0.7728
Epoch 2/100
Epoch 00001: val_loss improved from 0.29426 to 0.28122, saving model to ./checkpoint/weights.001-0.2812.hdf5
1541s - loss: 0.2622 - acc: 0.8065 - val_loss: 0.2812 - val_acc: 0.8008
Epoch 3/100
Epoch 00002: val_loss did not improve
1544s - loss: 0.2295 - acc: 0.8377 - val_loss: 0.2914 - val_acc: 0.8147
Epoch 4/100
#resume training
model, model_name = get_best_model()
# model = load_model(CHECKPOINT_DIR + 'weights.025-0.4508.hdf5')
# model_name = 'weights.025-0.4508.hdf5'
# print('model_name', model_name)
# #try increasing learningrate
# optimizer = Adam(lr=1e-4)
# model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
# callbacks = [ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1),
# EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1),
# ModelCheckpoint(filepath=CHECKPOINT_DIR+'weights.{epoch:03d}-{val_loss:.4f}.hdf5', monitor='val_loss', verbose=1, save_best_only=True),
# TensorBoard(log_dir=LOG_DIR, histogram_freq=0, write_graph=False, write_images=True)]
print('BATCH_SIZE:', BATCH_SIZE)
model.fit({'q1': train_q1_Double, 'q2': train_q2_Double}, y_train_Double,
batch_size=BATCH_SIZE, epochs=100, verbose=2, callbacks=callbacks,
validation_data=({'q1': valid_q1_Double, 'q2': valid_q2_Double}, y_valid_Double, val_sample_weights),
shuffle=True, class_weight=class_weight, initial_epoch=)
model_name weights.002-0.2822.hdf5
Out[37]:
[0.28221574697640039, 0.81337590717791675]
Baseline_weights.002-0.2822.hdf5_LSTM64*1_DENSE64*1_valloss0.2822.csv
sys.stdout = open(OUTPUT_DIR+'training_output.txt', 'a')
history = model.fit({'q1': train_q1, 'q2': train_q2}, y_train, batch_size=BATCH_SIZE, epochs=3, verbose=2, callbacks=callbacks,
validation_data=({'q1': valid_q1, 'q2': valid_q2}, y_valid), shuffle=True, initial_epoch=0)
sys.stdout = sys.__stdout__
summary_stats = pd.DataFrame({'epoch': [ i + 1 for i in history.epoch ],
'train_acc': history.history['acc'],
'valid_acc': history.history['val_acc'],
'train_loss': history.history['loss'],
'valid_loss': history.history['val_loss']})
summary_stats
plt.plot(summary_stats.train_loss) # blue
plt.plot(summary_stats.valid_loss) # green
plt.show()
units = 128 # Number of nodes in the Dense layers
dropout = 0.25 # Percentage of nodes to drop
nb_filter = 32 # Number of filters to use in Convolution1D
filter_length = 3 # Length of filter for Convolution1D
# Initialize weights and biases for the Dense layers
weights = initializers.TruncatedNormal(mean=0.0, stddev=0.05, seed=2)
bias = bias_initializer='zeros'
model1 = Sequential()
model1.add(Embedding(nb_words + 1, EMBEDDING_DIM, weights=[word_embedding_matrix], input_length = MAX_LEN, trainable = False))
model1.add(Convolution1D(filters=nb_filter, kernel_size=filter_length, padding='same'))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(Dropout(dropout))
model1.add(Convolution1D(filters=nb_filter, kernel_size=filter_length, padding='same'))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(Dropout(dropout))
model1.add(Flatten())
model2 = Sequential()
model2.add(Embedding(nb_words + 1, EMBEDDING_DIM, weights=[word_embedding_matrix], input_length = MAX_LEN, trainable = False))
model2.add(Convolution1D(filters=nb_filter, kernel_size=filter_length, padding='same'))
model2.add(BatchNormalization())
model2.add(Activation('relu'))
model2.add(Dropout(dropout))
model2.add(Convolution1D(filters=nb_filter, kernel_size=filter_length, padding='same'))
model2.add(BatchNormalization())
model2.add(Activation('relu'))
model2.add(Dropout(dropout))
model2.add(Flatten())
model3 = Sequential()
model3.add(Embedding(nb_words + 1, EMBEDDING_DIM, weights=[word_embedding_matrix], input_length = MAX_LEN, trainable = False))
model3.add(TimeDistributed(Dense(EMBEDDING_DIM)))
model3.add(BatchNormalization())
model3.add(Activation('relu'))
model3.add(Dropout(dropout))
model3.add(Lambda(lambda x: K.max(x, axis=1), output_shape=(EMBEDDING_DIM, )))
model4 = Sequential()
model4.add(Embedding(nb_words + 1, EMBEDDING_DIM, weights=[word_embedding_matrix], input_length = MAX_LEN, trainable = False))
model4.add(TimeDistributed(Dense(EMBEDDING_DIM)))
model4.add(BatchNormalization())
model4.add(Activation('relu'))
model4.add(Dropout(dropout))
model4.add(Lambda(lambda x: K.max(x, axis=1), output_shape=(EMBEDDING_DIM, )))
modela = Sequential()
modela.add(Merge([model1, model2], mode='concat'))
modela.add(Dense(units*2, kernel_initializer=weights, bias_initializer=bias))
modela.add(BatchNormalization())
modela.add(Activation('relu'))
modela.add(Dropout(dropout))
modela.add(Dense(units, kernel_initializer=weights, bias_initializer=bias))
modela.add(BatchNormalization())
modela.add(Activation('relu'))
modela.add(Dropout(dropout))
modelb = Sequential()
modelb.add(Merge([model3, model4], mode='concat'))
modelb.add(Dense(units*2, kernel_initializer=weights, bias_initializer=bias))
modelb.add(BatchNormalization())
modelb.add(Activation('relu'))
modelb.add(Dropout(dropout))
modelb.add(Dense(units, kernel_initializer=weights, bias_initializer=bias))
modelb.add(BatchNormalization())
modelb.add(Activation('relu'))
modelb.add(Dropout(dropout))
model = Sequential()
model.add(Merge([modela, modelb], mode='concat'))
model.add(Dense(units*2, kernel_initializer=weights, bias_initializer=bias))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(dropout))
model.add(Dense(units, kernel_initializer=weights, bias_initializer=bias))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(dropout))
model.add(Dense(units, kernel_initializer=weights, bias_initializer=bias))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(dropout))
model.add(Dense(1, kernel_initializer=weights, bias_initializer=bias))
model.add(BatchNormalization())
model.add(Activation('sigmoid'))