In [1]:
%run init.ipynb
In [19]:
preprocessor = mz.preprocessors.BasicPreprocessor(fixed_length_left=10, fixed_length_right=40, remove_stop_words=False)
In [20]:
train_pack_processed = preprocessor.fit_transform(train_pack_raw)
dev_pack_processed = preprocessor.transform(dev_pack_raw)
test_pack_processed = preprocessor.transform(test_pack_raw)
In [21]:
preprocessor.context
Out[21]:
In [22]:
model = mz.contrib.models.MatchLSTM()
model.params.update(preprocessor.context)
model.params['task'] = ranking_task
model.params['embedding_output_dim'] = 100
model.params['embedding_trainable'] = True
model.params['fc_num_units'] = 100
model.params['lstm_num_units'] = 100
model.params['dropout_rate'] = 0.5
model.params['optimizer'] = 'adadelta'
model.guess_and_fill_missing_params()
model.build()
model.compile()
print(model.params)
In [23]:
model.backend.summary()
In [24]:
embedding_matrix = glove_embedding.build_matrix(preprocessor.context['vocab_unit'].state['term_index'])
model.load_embedding_matrix(embedding_matrix)
In [25]:
test_x, test_y = test_pack_processed.unpack()
evaluate = mz.callbacks.EvaluateAllMetrics(model, x=test_x, y=test_y, batch_size=len(test_x))
In [26]:
train_generator = mz.DataGenerator(
train_pack_processed,
mode='pair',
num_dup=2,
num_neg=1,
batch_size=20
)
print('num batches:', len(train_generator))
In [27]:
history = model.fit_generator(train_generator, epochs=10, callbacks=[evaluate], workers=4, use_multiprocessing=True)
Use this function to update the README.md with a better set of parameters. Make sure you delete the correct section of the README.md before calling this function.
In [29]:
append_params_to_readme(model)